whisper-api/src/api/transcriptions.py

from fastapi import APIRouter, File, UploadFile, Form, HTTPException, Depends, Header
from fastapi.responses import JSONResponse
from typing import Optional, List
import time
import os
import hashlib

from src.config import settings
from src.services.whisper_service import transcribe_audio
from src.services.stats_service import log_usage
from src.database.db import get_db
from sqlalchemy.orm import Session

router = APIRouter()


from src.services.stats_service import hash_api_key
from src.database.db import SessionLocal
from src.database.models import ApiKey

def verify_api_key(authorization: Optional[str] = Header(None)):
    """Verify API key from Authorization header"""
    if not authorization:
        raise HTTPException(status_code=401, detail="Authorization header missing")

    # Extract Bearer token
    if not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Invalid authorization format")

    api_key = authorization.replace("Bearer ", "").strip()

    # Check environment variable keys first
    valid_keys = settings.get_api_keys_list()
    if api_key in valid_keys:
        return api_key

    # Check database keys
    db = SessionLocal()
    try:
        key_hash = hash_api_key(api_key)
        db_key = db.query(ApiKey).filter(
            ApiKey.key_hash == key_hash,
            ApiKey.is_active == True
        ).first()

        if db_key:
            return api_key
    finally:
        db.close()

    raise HTTPException(status_code=401, detail="Invalid API key")


@router.get("/models")
async def list_models(api_key: str = Depends(verify_api_key)):
    """List available models (OpenAI compatible)"""
    return {
        "data": [
            {
                "id": "whisper-1",
                "object": "model",
                "created": 1677532384,
                "owned_by": "openai"
            },
            {
                "id": "large-v3",
                "object": "model",
                "created": 1698796800,
                "owned_by": "openai"
            }
        ]
    }


@router.post("/audio/transcriptions")
async def create_transcription(
    file: UploadFile = File(...),
    model: str = Form("whisper-1"),
    language: Optional[str] = Form(None),
    prompt: Optional[str] = Form(None),
    response_format: str = Form("json"),
    temperature: float = Form(0.0),
    timestamp_granularities: Optional[List[str]] = Form(None),
    api_key: str = Depends(verify_api_key),
    db: Session = Depends(get_db)
):
    """
    Transcribe audio file (OpenAI compatible endpoint)

    - **file**: Audio file (mp3, mp4, mpeg, mpga, m4a, wav, webm)
    - **model**: Model ID (whisper-1 or large-v3)
    - **language**: Language code (e.g., 'de', 'en')
    - **response_format**: json, text, srt, verbose_json, vtt
    - **timestamp_granularities**: word, segment (for verbose_json)
    """

    start_time = time.time()
    temp_path = None

    try:
        # Validate file type
        allowed_extensions = {'.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm', '.ogg', '.oga', '.opus', '.flac'}
        file_ext = os.path.splitext(file.filename)[1].lower()

        if file_ext not in allowed_extensions:
            raise HTTPException(
                status_code=400,
                detail=f"Unsupported file format: {file_ext}"
            )

        # Save uploaded file
        temp_filename = f"{api_key[:8]}_{int(time.time())}_{file.filename}"
        temp_path = os.path.join(settings.uploads_path, temp_filename)

        with open(temp_path, "wb") as f:
            content = await file.read()
            f.write(content)

        file_size = len(content)

        # Transcribe
        include_word_timestamps = timestamp_granularities and "word" in timestamp_granularities

        result = await transcribe_audio(
            audio_path=temp_path,
            language=language,
            include_word_timestamps=include_word_timestamps
        )

        processing_time = int((time.time() - start_time) * 1000)

        # Log usage
        await log_usage(
            db=db,
            api_key=api_key,
            endpoint="/v1/audio/transcriptions",
            file_size=file_size,
            duration=result.get("duration"),
            processing_time=processing_time,
            model=settings.whisper_model,
            status="success"
        )

        # Format response based on requested format
        if response_format == "text":
            return result["text"]
        elif response_format == "verbose_json":
            return result
        else:
            return {"text": result["text"]}

    except Exception as e:
        processing_time = int((time.time() - start_time) * 1000)

        # Log error
        await log_usage(
            db=db,
            api_key=api_key,
            endpoint="/v1/audio/transcriptions",
            file_size=file_size if 'file_size' in locals() else None,
            duration=None,
            processing_time=processing_time,
            model=settings.whisper_model,
            status="error",
            error_message=str(e)
        )

        raise HTTPException(status_code=500, detail=str(e))

    finally:
        # Cleanup temp file
        if temp_path and os.path.exists(temp_path):
            try:
                os.remove(temp_path)
            except:
                pass