Initial commit: Whisper API with FastAPI, GPU support and Admin Dashboard

2026-01-28 23:16:44 +01:00
commit 008ef63bfd
28 changed files with 1871 additions and 0 deletions
--- a/src/api/transcriptions.py
+++ b/src/api/transcriptions.py
@@ -0,0 +1,160 @@
+from fastapi import APIRouter, File, UploadFile, Form, HTTPException, Depends, Header
+from fastapi.responses import JSONResponse
+from typing import Optional, List
+import time
+import os
+import hashlib
+
+from src.config import settings
+from src.services.whisper_service import transcribe_audio
+from src.services.stats_service import log_usage
+from src.database.db import get_db
+from sqlalchemy.orm import Session
+
+router = APIRouter()
+
+
+def verify_api_key(authorization: Optional[str] = Header(None)):
+    """Verify API key from Authorization header"""
+    if not authorization:
+        raise HTTPException(status_code=401, detail="Authorization header missing")
+    
+    # Extract Bearer token
+    if not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Invalid authorization format")
+    
+    api_key = authorization.replace("Bearer ", "").strip()
+    valid_keys = settings.get_api_keys_list()
+    
+    if not valid_keys:
+        raise HTTPException(status_code=500, detail="No API keys configured")
+    
+    if api_key not in valid_keys:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    
+    return api_key
+
+
+@router.get("/models")
+async def list_models(api_key: str = Depends(verify_api_key)):
+    """List available models (OpenAI compatible)"""
+    return {
+        "data": [
+            {
+                "id": "whisper-1",
+                "object": "model",
+                "created": 1677532384,
+                "owned_by": "openai"
+            },
+            {
+                "id": "large-v3",
+                "object": "model", 
+                "created": 1698796800,
+                "owned_by": "openai"
+            }
+        ]
+    }
+
+
+@router.post("/audio/transcriptions")
+async def create_transcription(
+    file: UploadFile = File(...),
+    model: str = Form("whisper-1"),
+    language: Optional[str] = Form(None),
+    prompt: Optional[str] = Form(None),
+    response_format: str = Form("json"),
+    temperature: float = Form(0.0),
+    timestamp_granularities: Optional[List[str]] = Form(None),
+    api_key: str = Depends(verify_api_key),
+    db: Session = Depends(get_db)
+):
+    """
+    Transcribe audio file (OpenAI compatible endpoint)
+    
+    - **file**: Audio file (mp3, mp4, mpeg, mpga, m4a, wav, webm)
+    - **model**: Model ID (whisper-1 or large-v3)
+    - **language**: Language code (e.g., 'de', 'en')
+    - **response_format**: json, text, srt, verbose_json, vtt
+    - **timestamp_granularities**: word, segment (for verbose_json)
+    """
+    
+    start_time = time.time()
+    temp_path = None
+    
+    try:
+        # Validate file type
+        allowed_extensions = {'.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm'}
+        file_ext = os.path.splitext(file.filename)[1].lower()
+        
+        if file_ext not in allowed_extensions:
+            raise HTTPException(
+                status_code=400, 
+                detail=f"Unsupported file format: {file_ext}"
+            )
+        
+        # Save uploaded file
+        temp_filename = f"{api_key[:8]}_{int(time.time())}_{file.filename}"
+        temp_path = os.path.join(settings.uploads_path, temp_filename)
+        
+        with open(temp_path, "wb") as f:
+            content = await file.read()
+            f.write(content)
+        
+        file_size = len(content)
+        
+        # Transcribe
+        include_word_timestamps = timestamp_granularities and "word" in timestamp_granularities
+        
+        result = await transcribe_audio(
+            audio_path=temp_path,
+            language=language,
+            include_word_timestamps=include_word_timestamps
+        )
+        
+        processing_time = int((time.time() - start_time) * 1000)
+        
+        # Log usage
+        await log_usage(
+            db=db,
+            api_key=api_key,
+            endpoint="/v1/audio/transcriptions",
+            file_size=file_size,
+            duration=result.get("duration"),
+            processing_time=processing_time,
+            model=settings.whisper_model,
+            status="success"
+        )
+        
+        # Format response based on requested format
+        if response_format == "text":
+            return result["text"]
+        elif response_format == "verbose_json":
+            return result
+        else:
+            return {"text": result["text"]}
+            
+    except Exception as e:
+        processing_time = int((time.time() - start_time) * 1000)
+        
+        # Log error
+        await log_usage(
+            db=db,
+            api_key=api_key,
+            endpoint="/v1/audio/transcriptions",
+            file_size=file_size if 'file_size' in locals() else None,
+            duration=None,
+            processing_time=processing_time,
+            model=settings.whisper_model,
+            status="error",
+            error_message=str(e)
+        )
+        
+        raise HTTPException(status_code=500, detail=str(e))
+    
+    finally:
+        # Cleanup temp file
+        if temp_path and os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except:
+                pass