bmc_hub/app/services/transcription_service.py

81 lines
3.0 KiB
Python
Raw Permalink Normal View History

"""
Transcription Service
Handles communication with the external Whisper API for audio transcription.
"""
import logging
import aiohttp
import asyncio
from typing import Optional, Dict, Any, List
from pathlib import Path
import json
from app.core.config import settings
logger = logging.getLogger(__name__)
class TranscriptionService:
"""Service for transcribing audio files via external Whisper API"""
def __init__(self):
self.api_url = settings.WHISPER_API_URL
self.enabled = settings.WHISPER_ENABLED
self.timeout = settings.WHISPER_TIMEOUT
self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS
async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
"""
Send audio content to Whisper API and return the transcript.
Args:
filename: Name of the file (used for format detection/logging)
content: Raw bytes of the audio file
Returns:
Transcribed text or None if failed
"""
if not self.enabled:
logger.debug("Whisper transcription is disabled in settings")
return None
# Basic extension check
ext = Path(filename).suffix.lower()
if ext not in self.supported_formats:
logger.debug(f"Skipping transcription for unsupported format: {filename}")
return None
logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")
try:
# Prepare the form data
# API expects: file=@filename
data = aiohttp.FormData()
data.add_field('file', content, filename=filename)
timeout = aiohttp.ClientTimeout(total=self.timeout)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(self.api_url, data=data) as response:
if response.status != 200:
error_text = await response.text()
logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
return None
result = await response.json()
# Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
if 'results' in result and len(result['results']) > 0:
transcript = result['results'][0].get('transcript', '').strip()
logger.info(f"✅ Transcription successful for {filename}")
return transcript
else:
logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
return None
except asyncio.TimeoutError:
logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
return None
except Exception as e:
logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
return None