Fix audio duration detection using librosa for accurate duration measurement
This commit is contained in:
@@ -17,6 +17,7 @@ from src.services.whisper_service import (
|
||||
from src.services.stats_service import log_usage
|
||||
from src.database.db import get_db
|
||||
from sqlalchemy.orm import Session
|
||||
import librosa
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -202,6 +203,13 @@ async def create_transcription(
|
||||
|
||||
file_size = len(content)
|
||||
|
||||
# Get audio duration using librosa (more reliable than whisper's duration)
|
||||
try:
|
||||
audio_duration = librosa.get_duration(path=temp_path)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not get audio duration: {e}")
|
||||
audio_duration = None
|
||||
|
||||
# Transcribe
|
||||
include_word_timestamps = timestamp_granularities and "word" in timestamp_granularities
|
||||
|
||||
@@ -213,13 +221,16 @@ async def create_transcription(
|
||||
|
||||
processing_time = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Use audio_duration if available, otherwise fall back to whisper's duration
|
||||
duration_to_log = audio_duration if audio_duration else result.get("duration")
|
||||
|
||||
# Log usage
|
||||
await log_usage(
|
||||
db=db,
|
||||
api_key=api_key,
|
||||
endpoint="/v1/audio/transcriptions",
|
||||
file_size=file_size,
|
||||
duration=result.get("duration"),
|
||||
duration=duration_to_log,
|
||||
processing_time=processing_time,
|
||||
model=settings.whisper_model,
|
||||
status="success"
|
||||
|
||||
Reference in New Issue
Block a user