bmc_hub/app/services/transcription_service.py

"""
Transcription Service
Handles communication with the external Whisper API for audio transcription.
"""

import logging
import aiohttp
import asyncio
from typing import Optional, Dict, Any, List
from pathlib import Path
import json

from app.core.config import settings

logger = logging.getLogger(__name__)

class TranscriptionService:
    """Service for transcribing audio files via external Whisper API"""

    def __init__(self):
        self.api_url = settings.WHISPER_API_URL
        self.enabled = settings.WHISPER_ENABLED
        self.timeout = settings.WHISPER_TIMEOUT
        self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS

    async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
        """
        Send audio content to Whisper API and return the transcript.

        Args:
            filename: Name of the file (used for format detection/logging)
            content: Raw bytes of the audio file

        Returns:
            Transcribed text or None if failed
        """
        if not self.enabled:
            logger.debug("Whisper transcription is disabled in settings")
            return None

        # Basic extension check
        ext = Path(filename).suffix.lower()
        if ext not in self.supported_formats:
            logger.debug(f"Skipping transcription for unsupported format: {filename}")
            return None

        logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")

        try:
            # Prepare the form data
            # API expects: file=@filename
            data = aiohttp.FormData()
            data.add_field('file', content, filename=filename)

            timeout = aiohttp.ClientTimeout(total=self.timeout)

            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.post(self.api_url, data=data) as response:
                    if response.status != 200:
                        error_text = await response.text()
                        logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
                        return None

                    result = await response.json()

                    # Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
                    if 'results' in result and len(result['results']) > 0:
                        transcript = result['results'][0].get('transcript', '').strip()
                        logger.info(f"✅ Transcription successful for {filename}")
                        return transcript
                    else:
                        logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
                        return None

        except asyncio.TimeoutError:
            logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
            return None
        except Exception as e:
            logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
            return None