Add intelligent GPU selector with automatic fallback to available GPU or CPU

This commit is contained in:
Dominic Ballenthin
2026-02-02 03:59:23 +01:00
parent eb9c56760c
commit 4a1f898b25

View File

@@ -51,6 +51,51 @@ def _download_hook(progress_bytes, total_bytes):
_model_status["status_message"] = f"Downloading: {_model_status['download_percentage']}%"
def get_best_gpu():
"""
Find the best available GPU with most free memory.
Returns GPU index or None if no GPU is available.
"""
if not torch.cuda.is_available():
return None
num_gpus = torch.cuda.device_count()
if num_gpus == 0:
return None
best_gpu = None
max_free_memory = 0
for gpu_id in range(num_gpus):
try:
# Get GPU properties
props = torch.cuda.get_device_properties(gpu_id)
total_memory = props.total_memory
# Try to get allocated memory
torch.cuda.set_device(gpu_id)
allocated_memory = torch.cuda.memory_allocated(gpu_id)
free_memory = total_memory - allocated_memory
print(f" GPU {gpu_id}: {props.name}")
print(f" Total: {total_memory / 1e9:.2f} GB")
print(f" Used: {allocated_memory / 1e9:.2f} GB")
print(f" Free: {free_memory / 1e9:.2f} GB")
# Check if this GPU has enough free memory (need at least 3GB for large-v3)
min_required_memory = 3 * 1024 * 1024 * 1024 # 3GB in bytes
if free_memory > min_required_memory and free_memory > max_free_memory:
max_free_memory = free_memory
best_gpu = gpu_id
except Exception as e:
print(f" GPU {gpu_id}: Error checking - {e}")
continue
return best_gpu
def load_model(model_name: str = None):
"""Load Whisper model with automatic CPU fallback"""
global _model, _model_status, _current_model_name
@@ -79,31 +124,101 @@ def load_model(model_name: str = None):
print(f"⚠️ GPU not available, falling back to CPU mode")
_model_status["status_message"] = "GPU not available, using CPU..."
print(f"Loading Whisper model: {model_name} on {actual_device}")
print(f"Loading Whisper model: {model_name}")
print("Checking available GPUs...")
try:
# Whisper doesn't have a direct progress callback, but we can monitor the models directory
# Try to load on the best available GPU first
if requested_device == "cuda":
best_gpu = get_best_gpu()
if best_gpu is not None:
print(f"🎯 Selected GPU {best_gpu} with most free memory")
try:
# Set the device before loading
torch.cuda.set_device(best_gpu)
# Load model on specific GPU
_model = whisper.load_model(
model_name,
device=f"cuda:{best_gpu}",
download_root=settings.models_path
)
_current_model_name = model_name
_model_status["is_downloading"] = False
_model_status["is_loaded"] = True
_model_status["download_percentage"] = 100
_model_status["status_message"] = f"Model loaded successfully on GPU {best_gpu}"
print(f"✅ Model {model_name} loaded on GPU {best_gpu}")
except RuntimeError as e:
if "out of memory" in str(e).lower() or "cuda" in str(e).lower():
print(f"⚠️ GPU {best_gpu} out of memory, trying other GPUs...")
torch.cuda.empty_cache()
# Try other GPUs
for gpu_id in range(torch.cuda.device_count()):
if gpu_id == best_gpu:
continue
try:
torch.cuda.set_device(gpu_id)
_model = whisper.load_model(
model_name,
device=f"cuda:{gpu_id}",
download_root=settings.models_path
)
_current_model_name = model_name
_model_status["is_downloading"] = False
_model_status["is_loaded"] = True
_model_status["download_percentage"] = 100
_model_status["status_message"] = f"Model loaded on GPU {gpu_id}"
print(f"✅ Model {model_name} loaded on GPU {gpu_id}")
break
except:
torch.cuda.empty_cache()
continue
else:
# All GPUs failed, fall back to CPU
print("⚠️ All GPUs full, falling back to CPU...")
raise Exception("All GPUs full")
else:
raise
else:
print("⚠️ No suitable GPU found, falling back to CPU...")
raise Exception("No suitable GPU")
# If we get here with no model, use CPU
if _model is None:
_model = whisper.load_model(
model_name,
device="cpu",
download_root=settings.models_path
)
_current_model_name = model_name
_model_status["is_downloading"] = False
_model_status["is_loaded"] = True
_model_status["download_percentage"] = 100
_model_status["status_message"] = "Model loaded on CPU"
settings.whisper_device = "cpu"
print(f"✅ Model {model_name} loaded on CPU")
except Exception as e:
print(f"⚠️ Error loading model: {e}")
# Final fallback to CPU
_model = whisper.load_model(
model_name,
device=actual_device,
device="cpu",
download_root=settings.models_path
)
_current_model_name = model_name
_model_status["is_downloading"] = False
_model_status["is_loaded"] = True
_model_status["download_percentage"] = 100
_model_status["status_message"] = "Model loaded successfully"
# Update device in status to reflect actual device used
if actual_device != requested_device:
settings.whisper_device = actual_device
print(f"✅ Model {model_name} loaded on CPU (GPU fallback)")
else:
print(f"✅ Model {model_name} loaded on {actual_device}")
except Exception as e:
_model_status["is_downloading"] = False
_model_status["status_message"] = f"Error: {str(e)}"
raise
_model_status["status_message"] = "Model loaded on CPU (error fallback)"
settings.whisper_device = "cpu"
print(f"✅ Model {model_name} loaded on CPU (after error)")
return _model