bmc_hub/app/services/transcription_service.py

"""
Transcription Service
Handles communication with the external Whisper API for audio transcription.
"""

import logging
import aiohttp
import asyncio
from typing import Optional, Dict, Any, List
from pathlib import Path
import json

from app.core.config import settings

logger = logging.getLogger(__name__)

class TranscriptionService:
    """Service for transcribing audio files via external Whisper API"""
    
    def __init__(self):
        self.api_url = settings.WHISPER_API_URL
        self.enabled = settings.WHISPER_ENABLED
        self.timeout = settings.WHISPER_TIMEOUT
        self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS

    async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
        """
        Send audio content to Whisper API and return the transcript.
        
        Args:
            filename: Name of the file (used for format detection/logging)
            content: Raw bytes of the audio file
            
        Returns:
            Transcribed text or None if failed
        """
        if not self.enabled:
            logger.debug("Whisper transcription is disabled in settings")
            return None

        # Basic extension check
        ext = Path(filename).suffix.lower()
        if ext not in self.supported_formats:
            logger.debug(f"Skipping transcription for unsupported format: {filename}")
            return None

        logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")

        try:
            # Prepare the form data
            # API expects: file=@filename
            data = aiohttp.FormData()
            data.add_field('file', content, filename=filename)

            timeout = aiohttp.ClientTimeout(total=self.timeout)
            
            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.post(self.api_url, data=data) as response:
                    if response.status != 200:
                        error_text = await response.text()
                        logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
                        return None
                    
                    result = await response.json()
                    
                    # Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
                    if 'results' in result and len(result['results']) > 0:
                        transcript = result['results'][0].get('transcript', '').strip()
                        logger.info(f"✅ Transcription successful for {filename}")
                        return transcript
                    else:
                        logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
                        return None

        except asyncio.TimeoutError:
            logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
            return None
        except Exception as e:
            logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
            return None
feat: Implement Transcription Service for audio files using Whisper API - Added `transcription_service.py` to handle audio transcription via Whisper API. - Integrated logging for transcription processes and error handling. - Supported audio format checks based on configuration settings. docs: Create Ordre System Implementation Plan - Drafted comprehensive implementation plan for e-conomic order integration. - Outlined business requirements, database changes, backend and frontend implementation details. - Included testing plan and deployment steps for the new order system. feat: Add AI prompts and regex action capabilities - Created `ai_prompts` table for storing custom AI prompts. - Added regex extraction and linking action to email workflow actions. feat: Introduce conversations module for transcribed audio - Created `conversations` table to store transcribed conversations with relevant metadata. - Added indexing for customer, ticket, and user linkage. - Implemented full-text search capabilities for Danish language. fix: Add category column to conversations for classification - Added `category` column to `conversations` table for better conversation classification. 2026-01-11 19:23:21 +01:00			`"""`
			`Transcription Service`
			`Handles communication with the external Whisper API for audio transcription.`
			`"""`

			`import logging`
			`import aiohttp`
			`import asyncio`
			`from typing import Optional, Dict, Any, List`
			`from pathlib import Path`
			`import json`

			`from app.core.config import settings`

			`logger = logging.getLogger(__name__)`

			`class TranscriptionService:`
			`"""Service for transcribing audio files via external Whisper API"""`

			`def __init__(self):`
			`self.api_url = settings.WHISPER_API_URL`
			`self.enabled = settings.WHISPER_ENABLED`
			`self.timeout = settings.WHISPER_TIMEOUT`
			`self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS`

			`async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:`
			`"""`
			`Send audio content to Whisper API and return the transcript.`

			`Args:`
			`filename: Name of the file (used for format detection/logging)`
			`content: Raw bytes of the audio file`

			`Returns:`
			`Transcribed text or None if failed`
			`"""`
			`if not self.enabled:`
			`logger.debug("Whisper transcription is disabled in settings")`
			`return None`

			`# Basic extension check`
			`ext = Path(filename).suffix.lower()`
			`if ext not in self.supported_formats:`
			`logger.debug(f"Skipping transcription for unsupported format: {filename}")`
			`return None`

			`logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")`

			`try:`
			`# Prepare the form data`
			`# API expects: file=@filename`
			`data = aiohttp.FormData()`
			`data.add_field('file', content, filename=filename)`

			`timeout = aiohttp.ClientTimeout(total=self.timeout)`

			`async with aiohttp.ClientSession(timeout=timeout) as session:`
			`async with session.post(self.api_url, data=data) as response:`
			`if response.status != 200:`
			`error_text = await response.text()`
			`logger.error(f"❌ Whisper API error ({response.status}): {error_text}")`
			`return None`

			`result = await response.json()`

			`# Expected format: {"results": [{"filename": "...", "transcript": "..."}]}`
			`if 'results' in result and len(result['results']) > 0:`
			`transcript = result['results'][0].get('transcript', '').strip()`
			`logger.info(f"✅ Transcription successful for {filename}")`
			`return transcript`
			`else:`
			`logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")`
			`return None`

			`except asyncio.TimeoutError:`
			`logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")`
			`return None`
			`except Exception as e:`
			`logger.error(f"❌ Error during transcription of {filename}: {str(e)}")`
			`return None`