Fix audio duration detection using librosa for accurate duration measurement

This commit is contained in:
Dominic Ballenthin
2026-01-29 12:04:44 +01:00
parent 9aa911ac45
commit eb9c56760c

View File

@@ -17,6 +17,7 @@ from src.services.whisper_service import (
from src.services.stats_service import log_usage from src.services.stats_service import log_usage
from src.database.db import get_db from src.database.db import get_db
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
import librosa
router = APIRouter() router = APIRouter()
@@ -202,6 +203,13 @@ async def create_transcription(
file_size = len(content) file_size = len(content)
# Get audio duration using librosa (more reliable than whisper's duration)
try:
audio_duration = librosa.get_duration(path=temp_path)
except Exception as e:
print(f"Warning: Could not get audio duration: {e}")
audio_duration = None
# Transcribe # Transcribe
include_word_timestamps = timestamp_granularities and "word" in timestamp_granularities include_word_timestamps = timestamp_granularities and "word" in timestamp_granularities
@@ -213,13 +221,16 @@ async def create_transcription(
processing_time = int((time.time() - start_time) * 1000) processing_time = int((time.time() - start_time) * 1000)
# Use audio_duration if available, otherwise fall back to whisper's duration
duration_to_log = audio_duration if audio_duration else result.get("duration")
# Log usage # Log usage
await log_usage( await log_usage(
db=db, db=db,
api_key=api_key, api_key=api_key,
endpoint="/v1/audio/transcriptions", endpoint="/v1/audio/transcriptions",
file_size=file_size, file_size=file_size,
duration=result.get("duration"), duration=duration_to_log,
processing_time=processing_time, processing_time=processing_time,
model=settings.whisper_model, model=settings.whisper_model,
status="success" status="success"