- Added `transcription_service.py` to handle audio transcription via Whisper API. - Integrated logging for transcription processes and error handling. - Supported audio format checks based on configuration settings. docs: Create Ordre System Implementation Plan - Drafted comprehensive implementation plan for e-conomic order integration. - Outlined business requirements, database changes, backend and frontend implementation details. - Included testing plan and deployment steps for the new order system. feat: Add AI prompts and regex action capabilities - Created `ai_prompts` table for storing custom AI prompts. - Added regex extraction and linking action to email workflow actions. feat: Introduce conversations module for transcribed audio - Created `conversations` table to store transcribed conversations with relevant metadata. - Added indexing for customer, ticket, and user linkage. - Implemented full-text search capabilities for Danish language. fix: Add category column to conversations for classification - Added `category` column to `conversations` table for better conversation classification.
81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
"""
|
|
Transcription Service
|
|
Handles communication with the external Whisper API for audio transcription.
|
|
"""
|
|
|
|
import logging
|
|
import aiohttp
|
|
import asyncio
|
|
from typing import Optional, Dict, Any, List
|
|
from pathlib import Path
|
|
import json
|
|
|
|
from app.core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class TranscriptionService:
|
|
"""Service for transcribing audio files via external Whisper API"""
|
|
|
|
def __init__(self):
|
|
self.api_url = settings.WHISPER_API_URL
|
|
self.enabled = settings.WHISPER_ENABLED
|
|
self.timeout = settings.WHISPER_TIMEOUT
|
|
self.supported_formats = settings.WHISPER_SUPPORTED_FORMATS
|
|
|
|
async def transcribe_audio(self, filename: str, content: bytes) -> Optional[str]:
|
|
"""
|
|
Send audio content to Whisper API and return the transcript.
|
|
|
|
Args:
|
|
filename: Name of the file (used for format detection/logging)
|
|
content: Raw bytes of the audio file
|
|
|
|
Returns:
|
|
Transcribed text or None if failed
|
|
"""
|
|
if not self.enabled:
|
|
logger.debug("Whisper transcription is disabled in settings")
|
|
return None
|
|
|
|
# Basic extension check
|
|
ext = Path(filename).suffix.lower()
|
|
if ext not in self.supported_formats:
|
|
logger.debug(f"Skipping transcription for unsupported format: {filename}")
|
|
return None
|
|
|
|
logger.info(f"🎙️ Transcribing audio file: {filename} ({len(content)} bytes)")
|
|
|
|
try:
|
|
# Prepare the form data
|
|
# API expects: file=@filename
|
|
data = aiohttp.FormData()
|
|
data.add_field('file', content, filename=filename)
|
|
|
|
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
async with session.post(self.api_url, data=data) as response:
|
|
if response.status != 200:
|
|
error_text = await response.text()
|
|
logger.error(f"❌ Whisper API error ({response.status}): {error_text}")
|
|
return None
|
|
|
|
result = await response.json()
|
|
|
|
# Expected format: {"results": [{"filename": "...", "transcript": "..."}]}
|
|
if 'results' in result and len(result['results']) > 0:
|
|
transcript = result['results'][0].get('transcript', '').strip()
|
|
logger.info(f"✅ Transcription successful for {filename}")
|
|
return transcript
|
|
else:
|
|
logger.warning(f"⚠️ Whisper API returned unexpected format: {result}")
|
|
return None
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"❌ Whisper API timed out after {self.timeout} seconds for {filename}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"❌ Error during transcription of {filename}: {str(e)}")
|
|
return None
|