- Added FastAPI router for serving email management UI at /emails - Created Jinja2 template for the email frontend - Developed SimpleEmailClassifier for keyword-based email classification - Documented email UI implementation details, features, and API integration in EMAIL_UI_IMPLEMENTATION.md
110 lines
3.8 KiB
Python
110 lines
3.8 KiB
Python
"""
|
|
Simple Keyword-Based Email Classifier
|
|
Fallback when AI classification is unavailable
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Optional
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SimpleEmailClassifier:
|
|
"""Simple rule-based email classifier using keywords"""
|
|
|
|
def __init__(self):
|
|
self.keyword_rules = {
|
|
'invoice': [
|
|
'faktura', 'invoice', 'kreditnota', 'credit note',
|
|
'ordrenr', 'order number', 'betalingspåmindelse', 'payment reminder',
|
|
'fakturanr', 'invoice number', 'betaling', 'payment'
|
|
],
|
|
'freight_note': [
|
|
'fragtbrev', 'tracking', 'forsendelse', 'shipment',
|
|
'levering', 'delivery', 'pakke', 'package', 'fragtbreve'
|
|
],
|
|
'order_confirmation': [
|
|
'ordrebekræftelse', 'order confirmation', 'bestilling bekræftet',
|
|
'ordre modtaget', 'order received'
|
|
],
|
|
'time_confirmation': [
|
|
'timer', 'hours', 'tidsforbrug', 'time spent',
|
|
'tidsregistrering', 'time registration'
|
|
],
|
|
'case_notification': [
|
|
'cc[0-9]{4}', 'case #', 'sag ', 'ticket', 'support'
|
|
],
|
|
'bankruptcy': [
|
|
'konkurs', 'bankruptcy', 'rekonstruktion', 'insolvency',
|
|
'betalingsstandsning', 'administration'
|
|
],
|
|
'spam': [
|
|
'unsubscribe', 'click here', 'free offer', 'gratis tilbud',
|
|
'vind nu', 'win now', 'limited time'
|
|
]
|
|
}
|
|
|
|
def classify(self, email_data: Dict) -> Dict:
|
|
"""
|
|
Classify email using simple keyword matching
|
|
Returns: {classification: str, confidence: float, reasoning: str}
|
|
"""
|
|
subject = (email_data.get('subject', '') or '').lower()
|
|
sender = (email_data.get('sender_email', '') or '').lower()
|
|
body = (email_data.get('body_text', '') or '').lower()[:500] # First 500 chars
|
|
|
|
logger.info(f"🔍 simple_classifier: subject='{subject}', body_len={len(body)}, sender='{sender}'")
|
|
|
|
# Combine all text for analysis
|
|
text = f"{subject} {body}"
|
|
|
|
# Check each category
|
|
scores = {}
|
|
for category, keywords in self.keyword_rules.items():
|
|
matches = 0
|
|
matched_keywords = []
|
|
|
|
for keyword in keywords:
|
|
# Use regex for patterns like CC[0-9]{4}
|
|
if re.search(keyword, text, re.IGNORECASE):
|
|
matches += 1
|
|
matched_keywords.append(keyword)
|
|
|
|
if matches > 0:
|
|
scores[category] = {
|
|
'matches': matches,
|
|
'keywords': matched_keywords
|
|
}
|
|
|
|
# Determine best match
|
|
if not scores:
|
|
return {
|
|
'classification': 'general',
|
|
'confidence': 0.5,
|
|
'reasoning': 'No specific keywords matched - classified as general'
|
|
}
|
|
|
|
# Get category with most matches
|
|
best_category = max(scores.items(), key=lambda x: x[1]['matches'])
|
|
category_name = best_category[0]
|
|
match_count = best_category[1]['matches']
|
|
matched_keywords = best_category[1]['keywords']
|
|
|
|
# Calculate confidence (0.6-0.9 based on matches)
|
|
confidence = min(0.9, 0.6 + (match_count * 0.1))
|
|
|
|
reasoning = f"Matched {match_count} keyword(s): {', '.join(matched_keywords[:3])}"
|
|
|
|
logger.info(f"✅ Keyword classification: {category_name} (confidence: {confidence:.2f})")
|
|
|
|
return {
|
|
'classification': category_name,
|
|
'confidence': confidence,
|
|
'reasoning': reasoning
|
|
}
|
|
|
|
|
|
# Global instance
|
|
simple_classifier = SimpleEmailClassifier()
|