81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
|
|
"""
|
||
|
|
Transcription Service
|
||
|
|
Handles communication with the external Whisper API for audio transcription.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import logging
|
||
|
|
import aiohttp
|
||
|
|
import asyncio
|
||
|
|
from typing import Optional, Dict, Any, List
|
||
|
|
from pathlib import Path
|
||
|
|
import json
|
||
|
|
|
||
|
|
from app.core.config import settings
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
class TranscriptionService:
|
||
|
|
"""Service for transcribing audio files via external Whisper API"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self.api_url = settings.WHISPER_API_URL
|
||
|
|
self.enabled = settings.WHISPER_ENABLED
|
||
|
|
self.timeout = settings.WHISPER_TIMEOUT
|
||
|
|
self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS
|
||
|
|
|
||
|
|
async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
|
||
|
|
"""
|
||
|
|
Send audio content to Whisper API and return the transcript.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
filename: Name of the file (used for format detection/logging)
|
||
|
|
content: Raw bytes of the audio file
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Transcribed text or None if failed
|
||
|
|
"""
|
||
|
|
if not self.enabled:
|
||
|
|
logger.debug("Whisper transcription is disabled in settings")
|
||
|
|
return None
|
||
|
|
|
||
|
|
# Basic extension check
|
||
|
|
ext = Path(filename).suffix.lower()
|
||
|
|
if ext not in self.supported_formats:
|
||
|
|
logger.debug(f"Skipping transcription for unsupported format: {filename}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Prepare the form data
|
||
|
|
# API expects: file=@filename
|
||
|
|
data = aiohttp.FormData()
|
||
|
|
data.add_field('file', content, filename=filename)
|
||
|
|
|
||
|
|
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
||
|
|
|
||
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||
|
|
async with session.post(self.api_url, data=data) as response:
|
||
|
|
if response.status != 200:
|
||
|
|
error_text = await response.text()
|
||
|
|
logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
result = await response.json()
|
||
|
|
|
||
|
|
# Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
|
||
|
|
if 'results' in result and len(result['results']) > 0:
|
||
|
|
transcript = result['results'][0].get('transcript', '').strip()
|
||
|
|
logger.info(f"✅ Transcription successful for {filename}")
|
||
|
|
return transcript
|
||
|
|
else:
|
||
|
|
logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
except asyncio.TimeoutError:
|
||
|
|
logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
|
||
|
|
return None
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
|
||
|
|
return None
|