bmc_hub/app/services/transcription_service.py
Christian eacbd36e83 feat: Implement Transcription Service for audio files using Whisper API
- Added `transcription_service.py` to handle audio transcription via Whisper API.
- Integrated logging for transcription processes and error handling.
- Supported audio format checks based on configuration settings.

docs: Create Ordre System Implementation Plan

- Drafted comprehensive implementation plan for e-conomic order integration.
- Outlined business requirements, database changes, backend and frontend implementation details.
- Included testing plan and deployment steps for the new order system.

feat: Add AI prompts and regex action capabilities

- Created `ai_prompts` table for storing custom AI prompts.
- Added regex extraction and linking action to email workflow actions.

feat: Introduce conversations module for transcribed audio

- Created `conversations` table to store transcribed conversations with relevant metadata.
- Added indexing for customer, ticket, and user linkage.
- Implemented full-text search capabilities for Danish language.

fix: Add category column to conversations for classification

- Added `category` column to `conversations` table for better conversation classification.
2026-01-11 19:23:21 +01:00

81 lines
3.0 KiB
Python

"""
Transcription Service
Handles communication with the external Whisper API for audio transcription.
"""
import logging
import aiohttp
import asyncio
from typing import Optional, Dict, Any, List
from pathlib import Path
import json
from app.core.config import settings
logger = logging.getLogger(__name__)
class TranscriptionService:
"""Service for transcribing audio files via external Whisper API"""
def __init__(self):
self.api_url = settings.WHISPER_API_URL
self.enabled = settings.WHISPER_ENABLED
self.timeout = settings.WHISPER_TIMEOUT
self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS
async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
"""
Send audio content to Whisper API and return the transcript.
Args:
filename: Name of the file (used for format detection/logging)
content: Raw bytes of the audio file
Returns:
Transcribed text or None if failed
"""
if not self.enabled:
logger.debug("Whisper transcription is disabled in settings")
return None
# Basic extension check
ext = Path(filename).suffix.lower()
if ext not in self.supported_formats:
logger.debug(f"Skipping transcription for unsupported format: {filename}")
return None
logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")
try:
# Prepare the form data
# API expects: file=@filename
data = aiohttp.FormData()
data.add_field('file', content, filename=filename)
timeout = aiohttp.ClientTimeout(total=self.timeout)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(self.api_url, data=data) as response:
if response.status != 200:
error_text = await response.text()
logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
return None
result = await response.json()
# Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
if 'results' in result and len(result['results']) > 0:
transcript = result['results'][0].get('transcript', '').strip()
logger.info(f"✅ Transcription successful for {filename}")
return transcript
else:
logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
return None
except asyncio.TimeoutError:
logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
return None
except Exception as e:
logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
return None