Fix dashboard download status display, add CPU/GPU toggle and automatic fallback
This commit is contained in:
@@ -10,7 +10,7 @@ router = APIRouter()
|
|||||||
|
|
||||||
@router.get("/health")
|
@router.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""Health check endpoint with GPU status"""
|
"""Health check endpoint with GPU and model status"""
|
||||||
|
|
||||||
# Get GPU info if available
|
# Get GPU info if available
|
||||||
gpu_info = {"available": False}
|
gpu_info = {"available": False}
|
||||||
@@ -30,11 +30,21 @@ async def health_check():
|
|||||||
# Get system info
|
# Get system info
|
||||||
memory = psutil.virtual_memory()
|
memory = psutil.virtual_memory()
|
||||||
|
|
||||||
|
# Get model status
|
||||||
|
model_status = get_model_info()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"model": settings.whisper_model,
|
"model": settings.whisper_model,
|
||||||
"gpu": gpu_info,
|
"gpu": gpu_info,
|
||||||
|
"model_status": {
|
||||||
|
"loaded": model_status.get("loaded", False),
|
||||||
|
"is_downloading": model_status.get("is_downloading", False),
|
||||||
|
"download_percentage": model_status.get("download_percentage", 0),
|
||||||
|
"status_message": model_status.get("status_message", "Unknown"),
|
||||||
|
"device": model_status.get("device", "cpu")
|
||||||
|
},
|
||||||
"system": {
|
"system": {
|
||||||
"cpu_percent": psutil.cpu_percent(interval=1),
|
"cpu_percent": psutil.cpu_percent(interval=1),
|
||||||
"memory_percent": memory.percent,
|
"memory_percent": memory.percent,
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ def _download_hook(progress_bytes, total_bytes):
|
|||||||
|
|
||||||
|
|
||||||
def load_model(model_name: str = None):
|
def load_model(model_name: str = None):
|
||||||
"""Load Whisper model"""
|
"""Load Whisper model with automatic CPU fallback"""
|
||||||
global _model, _model_status, _current_model_name
|
global _model, _model_status, _current_model_name
|
||||||
|
|
||||||
if model_name is None:
|
if model_name is None:
|
||||||
@@ -70,12 +70,21 @@ def load_model(model_name: str = None):
|
|||||||
_model_status["status_message"] = "Starting download..."
|
_model_status["status_message"] = "Starting download..."
|
||||||
_model_status["model_name"] = model_name
|
_model_status["model_name"] = model_name
|
||||||
|
|
||||||
print(f"Loading Whisper model: {model_name}")
|
# Check if GPU is available, fallback to CPU if not
|
||||||
|
requested_device = settings.whisper_device
|
||||||
|
actual_device = requested_device
|
||||||
|
|
||||||
|
if requested_device == "cuda" and not torch.cuda.is_available():
|
||||||
|
actual_device = "cpu"
|
||||||
|
print(f"⚠️ GPU not available, falling back to CPU mode")
|
||||||
|
_model_status["status_message"] = "GPU not available, using CPU..."
|
||||||
|
|
||||||
|
print(f"Loading Whisper model: {model_name} on {actual_device}")
|
||||||
try:
|
try:
|
||||||
# Whisper doesn't have a direct progress callback, but we can monitor the models directory
|
# Whisper doesn't have a direct progress callback, but we can monitor the models directory
|
||||||
_model = whisper.load_model(
|
_model = whisper.load_model(
|
||||||
model_name,
|
model_name,
|
||||||
device=settings.whisper_device,
|
device=actual_device,
|
||||||
download_root=settings.models_path
|
download_root=settings.models_path
|
||||||
)
|
)
|
||||||
_current_model_name = model_name
|
_current_model_name = model_name
|
||||||
@@ -83,7 +92,14 @@ def load_model(model_name: str = None):
|
|||||||
_model_status["is_loaded"] = True
|
_model_status["is_loaded"] = True
|
||||||
_model_status["download_percentage"] = 100
|
_model_status["download_percentage"] = 100
|
||||||
_model_status["status_message"] = "Model loaded successfully"
|
_model_status["status_message"] = "Model loaded successfully"
|
||||||
print(f"Model {model_name} loaded on {settings.whisper_device}")
|
|
||||||
|
# Update device in status to reflect actual device used
|
||||||
|
if actual_device != requested_device:
|
||||||
|
settings.whisper_device = actual_device
|
||||||
|
print(f"✅ Model {model_name} loaded on CPU (GPU fallback)")
|
||||||
|
else:
|
||||||
|
print(f"✅ Model {model_name} loaded on {actual_device}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_model_status["is_downloading"] = False
|
_model_status["is_downloading"] = False
|
||||||
_model_status["status_message"] = f"Error: {str(e)}"
|
_model_status["status_message"] = f"Error: {str(e)}"
|
||||||
|
|||||||
@@ -151,39 +151,55 @@
|
|||||||
// Model Status Updates
|
// Model Status Updates
|
||||||
async function updateModelStatus() {
|
async function updateModelStatus() {
|
||||||
try {
|
try {
|
||||||
// We need to use an API key for the model-status endpoint
|
|
||||||
// For admin panel, we'll use a simple endpoint that doesn't require auth
|
|
||||||
// or we'll skip this and use the health endpoint instead
|
|
||||||
const response = await fetch('/health');
|
const response = await fetch('/health');
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
|
|
||||||
const statusCard = document.getElementById('model-status-content');
|
const statusCard = document.getElementById('model-status-content');
|
||||||
const progressContainer = document.getElementById('model-progress-container');
|
|
||||||
const progressBar = document.getElementById('model-progress-bar');
|
|
||||||
const progressText = document.getElementById('model-progress-text');
|
|
||||||
|
|
||||||
|
// Prioritize download status over GPU status
|
||||||
|
if (data.model_status && data.model_status.is_downloading) {
|
||||||
|
// Show download status
|
||||||
|
statusCard.innerHTML = `
|
||||||
|
<p><strong>Status:</strong> <span style="color: #ed8936;">⏳ Downloading...</span></p>
|
||||||
|
<p>${data.model_status.status_message}</p>
|
||||||
|
<div style="background: #e2e8f0; border-radius: 10px; height: 20px; overflow: hidden; margin-top: 10px;">
|
||||||
|
<div style="background: linear-gradient(90deg, #667eea, #764ba2); height: 100%; width: ${data.model_status.download_percentage}%; transition: width 0.5s ease;"></div>
|
||||||
|
</div>
|
||||||
|
<p style="text-align: center; margin-top: 5px; font-weight: bold; color: #667aea;">
|
||||||
|
${data.model_status.download_percentage}%
|
||||||
|
</p>
|
||||||
|
`;
|
||||||
|
} else if (data.model_status && data.model_status.loaded) {
|
||||||
|
// Show GPU/Model ready status
|
||||||
if (data.gpu && data.gpu.available) {
|
if (data.gpu && data.gpu.available) {
|
||||||
statusCard.innerHTML = `
|
statusCard.innerHTML = `
|
||||||
<p><strong>Status:</strong> <span style="color: #48bb78;">✅ GPU Ready</span></p>
|
<p><strong>Status:</strong> <span style="color: #48bb78;">✅ Loaded & Ready</span></p>
|
||||||
|
<p><strong>Model:</strong> ${data.model}</p>
|
||||||
|
<p><strong>Device:</strong> ${data.model_status.device}</p>
|
||||||
<p><strong>GPU:</strong> ${data.gpu.name}</p>
|
<p><strong>GPU:</strong> ${data.gpu.name}</p>
|
||||||
<p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p>
|
<p><strong>VRAM:</strong> ${data.gpu.vram_used_gb} GB / ${data.gpu.vram_total_gb} GB</p>
|
||||||
<p><strong>Model:</strong> ${data.model}</p>
|
|
||||||
`;
|
`;
|
||||||
progressContainer.style.display = 'none';
|
|
||||||
} else {
|
} else {
|
||||||
statusCard.innerHTML = `
|
statusCard.innerHTML = `
|
||||||
<p><strong>Status:</strong> <span style="color: #ed8936;">⏳ Loading...</span></p>
|
<p><strong>Status:</strong> <span style="color: #48bb78;">✅ Loaded & Ready (CPU Mode)</span></p>
|
||||||
<p>Model is being downloaded. Please wait...</p>
|
<p><strong>Model:</strong> ${data.model}</p>
|
||||||
|
<p><strong>Device:</strong> CPU</p>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Not started or loading
|
||||||
|
statusCard.innerHTML = `
|
||||||
|
<p><strong>Status:</strong> <span style="color: #718096;">⏸️ Not Started</span></p>
|
||||||
|
<p>The model will be loaded on first transcription request.</p>
|
||||||
`;
|
`;
|
||||||
progressContainer.style.display = 'block';
|
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching model status:', error);
|
console.error('Error fetching model status:', error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update status every 5 seconds
|
// Update status every 3 seconds during download for smoother progress
|
||||||
updateModelStatus();
|
updateModelStatus();
|
||||||
setInterval(updateModelStatus, 5000);
|
setInterval(updateModelStatus, 3000);
|
||||||
</script>
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -38,6 +38,39 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Device Mode Toggle -->
|
||||||
|
<div class="card">
|
||||||
|
<h2>⚙️ Processing Mode</h2>
|
||||||
|
<p>Current mode: <strong>{{ current_status.device | upper }}</strong></p>
|
||||||
|
|
||||||
|
{% if gpu_available %}
|
||||||
|
{% if current_status.device == 'cuda' %}
|
||||||
|
<form method="POST" action="/admin/models/set-device">
|
||||||
|
<input type="hidden" name="device" value="cpu">
|
||||||
|
<button type="submit" class="btn" style="background: #4299e1;">
|
||||||
|
🔄 Switch to CPU Mode
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
{% else %}
|
||||||
|
<form method="POST" action="/admin/models/set-device">
|
||||||
|
<input type="hidden" name="device" value="cuda">
|
||||||
|
<button type="submit" class="btn" style="background: #48bb78;">
|
||||||
|
🚀 Switch to GPU Mode
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
{% endif %}
|
||||||
|
<p style="margin-top: 10px; font-size: 12px; color: #666;">
|
||||||
|
GPU detected: {{ gpu_name }}
|
||||||
|
</p>
|
||||||
|
{% else %}
|
||||||
|
<div style="background: #fed7d7; padding: 10px; border-radius: 5px; margin-top: 10px;">
|
||||||
|
<p style="color: #742a2a; margin: 0;">
|
||||||
|
⚠️ No compatible GPU detected. Running in CPU mode.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Available Models -->
|
<!-- Available Models -->
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>Available Models</h2>
|
<h2>Available Models</h2>
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from fastapi.responses import HTMLResponse, RedirectResponse
|
|||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
from src.config import settings
|
from src.config import settings
|
||||||
from src.database.db import get_db
|
from src.database.db import get_db
|
||||||
from src.database.models import ApiKey, UsageLog
|
from src.database.models import ApiKey, UsageLog
|
||||||
@@ -201,12 +203,20 @@ async def manage_models(request: Request, message: Optional[str] = None, error:
|
|||||||
models = get_available_models()
|
models = get_available_models()
|
||||||
current_status = get_model_status()
|
current_status = get_model_status()
|
||||||
|
|
||||||
|
# Check GPU availability
|
||||||
|
gpu_available = torch.cuda.is_available()
|
||||||
|
gpu_name = ""
|
||||||
|
if gpu_available:
|
||||||
|
gpu_name = torch.cuda.get_device_name(0)
|
||||||
|
|
||||||
return templates.TemplateResponse("models.html", {
|
return templates.TemplateResponse("models.html", {
|
||||||
"request": request,
|
"request": request,
|
||||||
"models": models,
|
"models": models,
|
||||||
"current_status": current_status,
|
"current_status": current_status,
|
||||||
"message": message,
|
"message": message,
|
||||||
"error": error
|
"error": error,
|
||||||
|
"gpu_available": gpu_available,
|
||||||
|
"gpu_name": gpu_name
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@@ -316,3 +326,57 @@ async def reload_model_admin(request: Request):
|
|||||||
url=f"/admin/models?error={str(e)}",
|
url=f"/admin/models?error={str(e)}",
|
||||||
status_code=302
|
status_code=302
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/models/set-device")
|
||||||
|
async def set_device_admin(
|
||||||
|
request: Request,
|
||||||
|
device: str = Form(...)
|
||||||
|
):
|
||||||
|
"""Switch between CPU and GPU mode"""
|
||||||
|
try:
|
||||||
|
check_admin_auth(request)
|
||||||
|
except HTTPException as e:
|
||||||
|
return RedirectResponse(url="/admin/login", status_code=302)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Validate device
|
||||||
|
if device not in ["cpu", "cuda"]:
|
||||||
|
return RedirectResponse(
|
||||||
|
url="/admin/models?error=Invalid device. Must be 'cpu' or 'cuda'",
|
||||||
|
status_code=302
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if GPU is available when switching to cuda
|
||||||
|
if device == "cuda" and not torch.cuda.is_available():
|
||||||
|
return RedirectResponse(
|
||||||
|
url="/admin/models?error=No compatible GPU detected. Cannot switch to GPU mode.",
|
||||||
|
status_code=302
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update settings
|
||||||
|
settings.whisper_device = device
|
||||||
|
|
||||||
|
# Unload current model to force reload with new device
|
||||||
|
from src.services.whisper_service import _model, _current_model_name
|
||||||
|
import gc
|
||||||
|
|
||||||
|
if _model is not None:
|
||||||
|
# Clear the model from memory
|
||||||
|
_model = None
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
# Clear CUDA cache if switching away from GPU
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
device_name = "GPU" if device == "cuda" else "CPU"
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/admin/models?message=Switched to {device_name} mode. Model will be reloaded on next request.",
|
||||||
|
status_code=302
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return RedirectResponse(
|
||||||
|
url=f"/admin/models?error={str(e)}",
|
||||||
|
status_code=302
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user