From 7f325b5c32c84dad89ecd32eae09569a5583ddca Mon Sep 17 00:00:00 2001 From: Christian Date: Thu, 11 Dec 2025 12:45:29 +0100 Subject: [PATCH] feat: Implement email management UI with FastAPI and keyword-based classification - Added FastAPI router for serving email management UI at /emails - Created Jinja2 template for the email frontend - Developed SimpleEmailClassifier for keyword-based email classification - Documented email UI implementation details, features, and API integration in EMAIL_UI_IMPLEMENTATION.md --- app/billing/frontend/supplier_invoices.html | 29 + app/emails/backend/router.py | 266 ++- app/emails/frontend/emails.html | 1776 +++++++++++++++++++ app/emails/frontend/views.py | 24 + app/services/email_analysis_service.py | 56 +- app/services/email_processor_service.py | 20 +- app/services/email_service.py | 148 +- app/services/simple_classifier.py | 109 ++ app/shared/frontend/base.html | 5 + docs/EMAIL_UI_IMPLEMENTATION.md | 199 +++ main.py | 2 + 11 files changed, 2568 insertions(+), 66 deletions(-) create mode 100644 app/emails/frontend/emails.html create mode 100644 app/emails/frontend/views.py create mode 100644 app/services/simple_classifier.py create mode 100644 docs/EMAIL_UI_IMPLEMENTATION.md diff --git a/app/billing/frontend/supplier_invoices.html b/app/billing/frontend/supplier_invoices.html index a43bd39..3835d8a 100644 --- a/app/billing/frontend/supplier_invoices.html +++ b/app/billing/frontend/supplier_invoices.html @@ -720,8 +720,37 @@ document.addEventListener('DOMContentLoaded', () => { loadVendors(); setDefaultDates(); loadPendingFilesCount(); // Load count for badge + checkEmailContext(); // Check if coming from email }); +// Check if coming from email context +function checkEmailContext() { + const emailContext = sessionStorage.getItem('supplierInvoiceContext'); + if (emailContext) { + try { + const context = JSON.parse(emailContext); + + // Show notification + showSuccess(`Opret faktura fra email: ${context.subject}`); + + // Pre-fill description field with email subject + const descriptionField = document.getElementById('description'); + if (descriptionField) { + descriptionField.value = `Fra email: ${context.subject}\nAfsender: ${context.sender}`; + } + + // Open create modal if exists + const createModal = new bootstrap.Modal(document.getElementById('invoiceModal')); + createModal.show(); + + // Clear context after use + sessionStorage.removeItem('supplierInvoiceContext'); + } catch (error) { + console.error('Failed to parse email context:', error); + } + } +} + // Set default dates function setDefaultDates() { const today = new Date().toISOString().split('T')[0]; diff --git a/app/emails/backend/router.py b/app/emails/backend/router.py index 11d4f62..c86df7c 100644 --- a/app/emails/backend/router.py +++ b/app/emails/backend/router.py @@ -9,7 +9,7 @@ from typing import List, Optional from pydantic import BaseModel from datetime import datetime, date -from app.core.database import execute_query, execute_insert +from app.core.database import execute_query, execute_insert, execute_update from app.services.email_processor_service import EmailProcessorService logger = logging.getLogger(__name__) @@ -36,6 +36,16 @@ class EmailListItem(BaseModel): customer_name: Optional[str] = None +class EmailAttachment(BaseModel): + id: int + email_id: int + filename: str + content_type: Optional[str] + size_bytes: Optional[int] + file_path: Optional[str] + created_at: datetime + + class EmailDetail(BaseModel): id: int message_id: str @@ -64,6 +74,7 @@ class EmailDetail(BaseModel): auto_processed: bool created_at: datetime updated_at: datetime + attachments: List[EmailAttachment] = [] class EmailRule(BaseModel): @@ -146,15 +157,24 @@ async def get_email(email_id: int): WHERE id = %s AND deleted_at IS NULL """ result = execute_query(query, (email_id,)) + logger.info(f"🔍 Query result type: {type(result)}, length: {len(result) if result else 0}") if not result: raise HTTPException(status_code=404, detail="Email not found") + # Store email before update + email_data = result[0] + + # Get attachments + att_query = "SELECT * FROM email_attachments WHERE email_id = %s ORDER BY id" + attachments = execute_query(att_query, (email_id,)) + email_data['attachments'] = attachments or [] + # Mark as read update_query = "UPDATE email_messages SET is_read = true WHERE id = %s" - execute_query(update_query, (email_id,)) + execute_update(update_query, (email_id,)) - return result[0] + return email_data except HTTPException: raise @@ -163,6 +183,136 @@ async def get_email(email_id: int): raise HTTPException(status_code=500, detail=str(e)) +@router.get("/emails/{email_id}/attachments/{attachment_id}") +async def download_attachment(email_id: int, attachment_id: int): + """Download email attachment""" + from fastapi.responses import FileResponse + import os + + try: + query = """ + SELECT a.* FROM email_attachments a + JOIN email_messages e ON e.id = a.email_id + WHERE a.id = %s AND a.email_id = %s AND e.deleted_at IS NULL + """ + result = execute_query(query, (attachment_id, email_id)) + + if not result: + raise HTTPException(status_code=404, detail="Attachment not found") + + attachment = result[0] + file_path = attachment['file_path'] + + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail="File not found on disk") + + return FileResponse( + path=file_path, + filename=attachment['filename'], + media_type=attachment.get('content_type', 'application/octet-stream') + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Error downloading attachment {attachment_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.put("/emails/{email_id}") +async def update_email(email_id: int, status: Optional[str] = None): + """Update email (archive, mark as read, etc)""" + try: + # Build update fields dynamically + updates = [] + params = [] + + if status: + updates.append("status = %s") + params.append(status) + + if not updates: + raise HTTPException(status_code=400, detail="No fields to update") + + params.append(email_id) + query = f"UPDATE email_messages SET {', '.join(updates)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s" + execute_update(query, tuple(params)) + + logger.info(f"✅ Updated email {email_id}: status={status}") + return {"success": True, "message": "Email updated"} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Error updating email {email_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/emails/{email_id}") +async def delete_email(email_id: int): + """Soft delete email""" + try: + query = """ + UPDATE email_messages + SET deleted_at = CURRENT_TIMESTAMP + WHERE id = %s AND deleted_at IS NULL + """ + execute_update(query, (email_id,)) + + logger.info(f"🗑️ Deleted email {email_id}") + return {"success": True, "message": "Email deleted"} + + except Exception as e: + logger.error(f"❌ Error deleting email {email_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/emails/{email_id}/reprocess") +async def reprocess_email(email_id: int): + """Reprocess email (re-classify and apply rules)""" + try: + # Get email + query = "SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL" + result = execute_query(query, (email_id,)) + + if not result: + raise HTTPException(status_code=404, detail="Email not found") + + email = result[0] + + # Re-classify + processor = EmailProcessorService() + classification, confidence = await processor.classify_email( + email['subject'], + email['body_text'] or email['body_html'] + ) + + # Update classification + update_query = """ + UPDATE email_messages + SET classification = %s, + confidence_score = %s, + classification_date = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + WHERE id = %s + """ + execute_update(update_query, (classification, confidence, email_id)) + + logger.info(f"🔄 Reprocessed email {email_id}: {classification} ({confidence:.2f})") + return { + "success": True, + "message": "Email reprocessed", + "classification": classification, + "confidence": confidence + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Error reprocessing email {email_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/emails/process") async def process_emails(): """Manually trigger email processing""" @@ -181,25 +331,101 @@ async def process_emails(): raise HTTPException(status_code=500, detail=str(e)) -@router.post("/emails/{email_id}/reprocess") -async def reprocess_email(email_id: int): - """Manually reprocess a single email (reclassify + rematch rules)""" +@router.post("/emails/bulk/archive") +async def bulk_archive(email_ids: List[int]): + """Bulk archive emails""" try: - processor = EmailProcessorService() - await processor.reprocess_email(email_id) + if not email_ids: + raise HTTPException(status_code=400, detail="No email IDs provided") - return { - "success": True, - "message": f"Email {email_id} reprocessed successfully" - } + placeholders = ','.join(['%s'] * len(email_ids)) + query = f""" + UPDATE email_messages + SET status = 'archived', updated_at = CURRENT_TIMESTAMP + WHERE id IN ({placeholders}) AND deleted_at IS NULL + """ + execute_update(query, tuple(email_ids)) + + logger.info(f"📦 Bulk archived {len(email_ids)} emails") + return {"success": True, "message": f"{len(email_ids)} emails archived"} except Exception as e: - logger.error(f"❌ Error reprocessing email {email_id}: {e}") + logger.error(f"❌ Error bulk archiving: {e}") raise HTTPException(status_code=500, detail=str(e)) +@router.post("/emails/bulk/reprocess") +async def bulk_reprocess(email_ids: List[int]): + """Bulk reprocess emails""" + try: + if not email_ids: + raise HTTPException(status_code=400, detail="No email IDs provided") + + processor = EmailProcessorService() + success_count = 0 + + for email_id in email_ids: + try: + # Get email + query = "SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL" + result = execute_query(query, (email_id,)) + + if result: + email = result[0] + classification, confidence = await processor.classify_email( + email['subject'], + email['body_text'] or email['body_html'] + ) + + update_query = """ + UPDATE email_messages + SET classification = %s, confidence_score = %s, + classification_date = CURRENT_TIMESTAMP + WHERE id = %s + """ + execute_update(update_query, (classification, confidence, email_id)) + success_count += 1 + except Exception as e: + logger.error(f"Failed to reprocess email {email_id}: {e}") + + logger.info(f"🔄 Bulk reprocessed {success_count}/{len(email_ids)} emails") + return {"success": True, "message": f"{success_count} emails reprocessed"} + + except Exception as e: + logger.error(f"❌ Error bulk reprocessing: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/emails/bulk/delete") +async def bulk_delete(email_ids: List[int]): + """Bulk soft delete emails""" + try: + if not email_ids: + raise HTTPException(status_code=400, detail="No email IDs provided") + + placeholders = ','.join(['%s'] * len(email_ids)) + query = f""" + UPDATE email_messages + SET deleted_at = CURRENT_TIMESTAMP + WHERE id IN ({placeholders}) AND deleted_at IS NULL + """ + execute_update(query, tuple(email_ids)) + + logger.info(f"🗑️ Bulk deleted {len(email_ids)} emails") + return {"success": True, "message": f"{len(email_ids)} emails deleted"} + + except Exception as e: + logger.error(f"❌ Error bulk deleting: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +class ClassificationUpdate(BaseModel): + classification: str + confidence: Optional[float] = None + + @router.put("/emails/{email_id}/classify") -async def update_classification(email_id: int, classification: str): +async def update_classification(email_id: int, data: ClassificationUpdate): """Manually update email classification""" try: valid_classifications = [ @@ -207,20 +433,24 @@ async def update_classification(email_id: int, classification: str): 'case_notification', 'customer_email', 'bankruptcy', 'general', 'spam', 'unknown' ] - if classification not in valid_classifications: + if data.classification not in valid_classifications: raise HTTPException(status_code=400, detail=f"Invalid classification. Must be one of: {valid_classifications}") + confidence = data.confidence if data.confidence is not None else 1.0 + query = """ UPDATE email_messages - SET classification = %s, + SET classification = %s, + confidence_score = %s, classification_date = CURRENT_TIMESTAMP WHERE id = %s AND deleted_at IS NULL """ - execute_query(query, (classification, email_id)) + execute_update(query, (data.classification, confidence, email_id)) + logger.info(f"✏️ Manual classification: Email {email_id} → {data.classification}") return { "success": True, - "message": f"Email {email_id} classified as '{classification}'" + "message": f"Email {email_id} classified as '{data.classification}'" } except HTTPException: diff --git a/app/emails/frontend/emails.html b/app/emails/frontend/emails.html new file mode 100644 index 0000000..bd54aba --- /dev/null +++ b/app/emails/frontend/emails.html @@ -0,0 +1,1776 @@ +{% extends "shared/frontend/base.html" %} + +{% block title %}Email - BMC Hub{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Email

+

Administrer og klassificer emails automatisk

+
+
+ + +
+
+ + +
+ + 0 valgt +
+ + + + +
+
+ + +
+ + + + + + + + +
+ + +
+ Tryk ? for genveje +
+ + + + + + + + + +{% endblock %} + +{% block extra_js %} + +{% endblock %} diff --git a/app/emails/frontend/views.py b/app/emails/frontend/views.py new file mode 100644 index 0000000..818ed09 --- /dev/null +++ b/app/emails/frontend/views.py @@ -0,0 +1,24 @@ +""" +Email Frontend Views +Serves the email management UI +""" + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates +import logging + +logger = logging.getLogger(__name__) + +router = APIRouter() + +# Setup Jinja2 templates +templates = Jinja2Templates(directory="app") + +@router.get("/emails", response_class=HTMLResponse) +async def emails_page(request: Request): + """Email management UI - 3-column modern email interface""" + return templates.TemplateResponse( + "emails/frontend/emails.html", + {"request": request} + ) diff --git a/app/services/email_analysis_service.py b/app/services/email_analysis_service.py index 456d852..d206cd2 100644 --- a/app/services/email_analysis_service.py +++ b/app/services/email_analysis_service.py @@ -59,40 +59,21 @@ class EmailAnalysisService: def _build_classification_prompt(self) -> str: """Build Danish system prompt for email classification""" - return """Du er en ekspert i at klassificere danske forretningsemails. + return """Classify this Danish business email into ONE category. Return ONLY valid JSON with no explanation. -Din opgave er at analysere emailens indhold og klassificere den i én af følgende kategorier: +Categories: invoice, freight_note, order_confirmation, time_confirmation, case_notification, customer_email, bankruptcy, general, spam, unknown -**Kategorier:** -1. **invoice** - Fakturaer fra leverandører (inkl. kreditnotaer) -2. **freight_note** - Fragtbreve og forsendelsesbekræftelser -3. **order_confirmation** - Ordrebekræftelser fra leverandører -4. **time_confirmation** - Bekræftelser på tidsforbrug/timer (fra kunder eller interne) -5. **case_notification** - Notifikationer om sager, support tickets, opgaver -6. **customer_email** - Generelle kundehenvendelser (spørgsmål, feedback, klager) -7. **bankruptcy** - Konkursmeldinger, rekonstruktion, betalingsstandsning -8. **general** - Almindelig kommunikation (opdateringer, møder, newsletters) -9. **spam** - Spam, reklame, phishing -10. **unknown** - Kan ikke klassificeres med sikkerhed +Rules: +- invoice: Contains invoice number, amount, or payment info +- time_confirmation: Time/hours confirmation, often with case references +- case_notification: Notifications about specific cases (CC0001, Case #123) +- bankruptcy: Explicit bankruptcy/insolvency notice +- Be conservative: Use general or unknown if uncertain -**Vigtige regler:** -- `invoice` skal indeholde fakturanummer, beløb, eller betalingsinformation -- `time_confirmation` indeholder timer/tidsforbrug, ofte med case/sagsreferencer -- `case_notification` er notifikationer om specifikke sager (CC0001, Case #123 osv.) -- `bankruptcy` kun hvis der er EKSPLICIT konkursmelding -- Vær konservativ: Hvis du er i tvivl, brug `general` eller `unknown` +Response format (JSON only, no other text): +{"classification": "invoice", "confidence": 0.95, "reasoning": "Subject contains 'Faktura' and invoice number"} -**Output format (JSON):** -```json -{ - "classification": "invoice", - "confidence": 0.95, - "reasoning": "Emailen indeholder fakturanummer, beløb og betalingsinstruktioner" -} -``` - -Returner KUN JSON - ingen anden tekst. -""" +IMPORTANT: Return ONLY the JSON object. Do not include any explanation, thinking, or additional text.""" def _build_email_context(self, email_data: Dict) -> str: """Build email context for AI analysis""" @@ -130,7 +111,7 @@ Klassificer denne email.""" "stream": False, "options": { "temperature": 0.1, # Low temperature for consistent classification - "num_predict": 200 # Short response expected + "num_predict": 500 # Enough for complete JSON response } } @@ -145,10 +126,19 @@ Klassificer denne email.""" return None data = await response.json() - content = data.get('message', {}).get('content', '') + + message_data = data.get('message', {}) + + # qwen3 model returns 'thinking' field instead of 'content' for reasoning + # Try both fields + content = message_data.get('content', '') or message_data.get('thinking', '') processing_time = (datetime.now() - start_time).total_seconds() * 1000 + if not content: + logger.error(f"❌ Ollama returned empty response. Message keys: {message_data.keys()}") + return None + # Parse JSON response result = self._parse_ollama_response(content) @@ -157,7 +147,7 @@ Klassificer denne email.""" logger.info(f"✅ AI classification: {result['classification']} (confidence: {result['confidence']}, {processing_time:.0f}ms)") return result else: - logger.error(f"❌ Failed to parse Ollama response: {content[:100]}") + logger.error(f"❌ Failed to parse Ollama response. Content length: {len(content)}, First 300 chars: {content[:300]}") return None except asyncio.TimeoutError: diff --git a/app/services/email_processor_service.py b/app/services/email_processor_service.py index ea345c0..70efb4a 100644 --- a/app/services/email_processor_service.py +++ b/app/services/email_processor_service.py @@ -10,8 +10,9 @@ from datetime import datetime from app.services.email_service import EmailService from app.services.email_analysis_service import EmailAnalysisService +from app.services.simple_classifier import simple_classifier from app.core.config import settings -from app.core.database import execute_query +from app.core.database import execute_query, execute_update logger = logging.getLogger(__name__) @@ -25,6 +26,7 @@ class EmailProcessorService: self.enabled = settings.EMAIL_TO_TICKET_ENABLED self.rules_enabled = settings.EMAIL_RULES_ENABLED self.auto_process = settings.EMAIL_RULES_AUTO_PROCESS + self.ai_enabled = settings.EMAIL_AI_ENABLED async def process_inbox(self) -> Dict: """ @@ -93,8 +95,14 @@ class EmailProcessorService: async def _classify_and_update(self, email_data: Dict): """Classify email and update database""" try: - # Run AI classification - result = await self.analysis_service.classify_email(email_data) + logger.info(f"🔍 _classify_and_update: ai_enabled={self.ai_enabled}, EMAIL_AI_ENABLED={settings.EMAIL_AI_ENABLED}") + + # Run classification (AI or simple keyword-based) + if self.ai_enabled: + result = await self.analysis_service.classify_email(email_data) + else: + logger.info(f"🔍 Using simple keyword classifier for email {email_data['id']}") + result = simple_classifier.classify(email_data) classification = result.get('classification', 'unknown') confidence = result.get('confidence', 0.0) @@ -107,7 +115,7 @@ class EmailProcessorService: classification_date = CURRENT_TIMESTAMP WHERE id = %s """ - execute_query(query, (classification, confidence, email_data['id'])) + execute_update(query, (classification, confidence, email_data['id'])) logger.info(f"✅ Classified email {email_data['id']} as '{classification}' (confidence: {confidence:.2f})") @@ -420,8 +428,8 @@ class EmailProcessorService: email_data = result[0] - # Reclassify - if settings.EMAIL_AI_ENABLED: + # Reclassify (either AI or keyword-based) + if settings.EMAIL_AUTO_CLASSIFY: await self._classify_and_update(email_data) # Rematch rules diff --git a/app/services/email_service.py b/app/services/email_service.py index 41039fd..c48f993 100644 --- a/app/services/email_service.py +++ b/app/services/email_service.py @@ -12,6 +12,7 @@ from typing import List, Dict, Optional, Tuple from datetime import datetime import json import asyncio +import base64 from aiohttp import ClientSession, BasicAuth import msal @@ -180,6 +181,19 @@ class EmailService: try: parsed_email = self._parse_graph_message(msg) + # Fetch attachments if email has them + if msg.get('hasAttachments', False): + attachments = await self._fetch_graph_attachments( + user_email, + msg['id'], + access_token, + session + ) + parsed_email['attachments'] = attachments + parsed_email['attachment_count'] = len(attachments) + else: + parsed_email['attachments'] = [] + # Check if already exists if not self._email_exists(parsed_email['message_id']): emails.append(parsed_email) @@ -274,17 +288,35 @@ class EmailService: except Exception: body_text = str(msg.get_payload()) - # Check for attachments - has_attachments = False - attachment_count = 0 + # Extract attachments + attachments = [] if msg.is_multipart(): for part in msg.walk(): if part.get_content_maintype() == 'multipart': continue - if part.get('Content-Disposition') is not None: - has_attachments = True - attachment_count += 1 + + # Skip text parts (body content) + if part.get_content_type() in ['text/plain', 'text/html']: + continue + + # Check if part has a filename (indicates attachment) + filename = part.get_filename() + if filename: + # Decode filename if needed + filename = self._decode_header(filename) + + # Get attachment content + content = part.get_payload(decode=True) + content_type = part.get_content_type() + + if content: # Only add if we got content + attachments.append({ + 'filename': filename, + 'content': content, + 'content_type': content_type, + 'size': len(content) + }) return { 'message_id': message_id, @@ -297,8 +329,9 @@ class EmailService: 'body_html': body_html, 'received_date': received_date, 'folder': self.imap_config['folder'], - 'has_attachments': has_attachments, - 'attachment_count': attachment_count + 'has_attachments': len(attachments) > 0, + 'attachment_count': len(attachments), + 'attachments': attachments } def _parse_graph_message(self, msg: Dict) -> Dict: @@ -341,9 +374,58 @@ class EmailService: 'received_date': received_date, 'folder': self.imap_config['folder'], 'has_attachments': msg.get('hasAttachments', False), - 'attachment_count': 0 # TODO: Fetch attachment count from Graph API if needed + 'attachment_count': 0 # Will be updated after fetching attachments } + async def _fetch_graph_attachments( + self, + user_email: str, + message_id: str, + access_token: str, + session: ClientSession + ) -> List[Dict]: + """Fetch attachments for a specific message from Graph API""" + attachments = [] + + try: + # Graph API endpoint for message attachments + url = f"https://graph.microsoft.com/v1.0/users/{user_email}/messages/{message_id}/attachments" + headers = { + 'Authorization': f'Bearer {access_token}', + 'Content-Type': 'application/json' + } + + async with session.get(url, headers=headers) as response: + if response.status != 200: + logger.warning(f"⚠️ Failed to fetch attachments for message {message_id}: {response.status}") + return [] + + data = await response.json() + attachment_list = data.get('value', []) + + for att in attachment_list: + # Graph API returns base64 content in contentBytes + content_bytes = att.get('contentBytes', '') + if content_bytes: + import base64 + content = base64.b64decode(content_bytes) + else: + content = b'' + + attachments.append({ + 'filename': att.get('name', 'unknown'), + 'content': content, + 'content_type': att.get('contentType', 'application/octet-stream'), + 'size': att.get('size', len(content)) + }) + + logger.info(f"📎 Fetched attachment: {att.get('name')} ({att.get('size', 0)} bytes)") + + except Exception as e: + logger.error(f"❌ Error fetching attachments for message {message_id}: {e}") + + return attachments + def _decode_header(self, header: str) -> str: """Decode email header (handles MIME encoding)""" if not header: @@ -425,12 +507,60 @@ class EmailService: )) logger.info(f"✅ Saved email {email_id}: {email_data['subject'][:50]}...") + + # Save attachments if any + if email_data.get('attachments'): + await self._save_attachments(email_id, email_data['attachments']) + return email_id except Exception as e: logger.error(f"❌ Error saving email to database: {e}") return None + async def _save_attachments(self, email_id: int, attachments: List[Dict]): + """Save email attachments to disk and database""" + import os + import hashlib + from pathlib import Path + + # Create uploads directory if not exists + upload_dir = Path("uploads/email_attachments") + upload_dir.mkdir(parents=True, exist_ok=True) + + for att in attachments: + try: + filename = att['filename'] + content = att['content'] + content_type = att.get('content_type', 'application/octet-stream') + size_bytes = att['size'] + + # Generate MD5 hash for deduplication + md5_hash = hashlib.md5(content).hexdigest() + + # Save to disk with hash prefix + file_path = upload_dir / f"{md5_hash}_{filename}" + file_path.write_bytes(content) + + # Save to database + query = """ + INSERT INTO email_attachments + (email_id, filename, content_type, size_bytes, file_path) + VALUES (%s, %s, %s, %s, %s) + """ + execute_insert(query, ( + email_id, + filename, + content_type, + size_bytes, + str(file_path) + )) + + logger.info(f"📎 Saved attachment: {filename} ({size_bytes} bytes)") + + except Exception as e: + logger.error(f"❌ Failed to save attachment {filename}: {e}") + async def get_unprocessed_emails(self, limit: int = 100) -> List[Dict]: """Get emails from database that haven't been processed yet""" query = """ diff --git a/app/services/simple_classifier.py b/app/services/simple_classifier.py new file mode 100644 index 0000000..a81c9b6 --- /dev/null +++ b/app/services/simple_classifier.py @@ -0,0 +1,109 @@ +""" +Simple Keyword-Based Email Classifier +Fallback when AI classification is unavailable +""" + +import logging +from typing import Dict, Optional +import re + +logger = logging.getLogger(__name__) + + +class SimpleEmailClassifier: + """Simple rule-based email classifier using keywords""" + + def __init__(self): + self.keyword_rules = { + 'invoice': [ + 'faktura', 'invoice', 'kreditnota', 'credit note', + 'ordrenr', 'order number', 'betalingspåmindelse', 'payment reminder', + 'fakturanr', 'invoice number', 'betaling', 'payment' + ], + 'freight_note': [ + 'fragtbrev', 'tracking', 'forsendelse', 'shipment', + 'levering', 'delivery', 'pakke', 'package', 'fragtbreve' + ], + 'order_confirmation': [ + 'ordrebekræftelse', 'order confirmation', 'bestilling bekræftet', + 'ordre modtaget', 'order received' + ], + 'time_confirmation': [ + 'timer', 'hours', 'tidsforbrug', 'time spent', + 'tidsregistrering', 'time registration' + ], + 'case_notification': [ + 'cc[0-9]{4}', 'case #', 'sag ', 'ticket', 'support' + ], + 'bankruptcy': [ + 'konkurs', 'bankruptcy', 'rekonstruktion', 'insolvency', + 'betalingsstandsning', 'administration' + ], + 'spam': [ + 'unsubscribe', 'click here', 'free offer', 'gratis tilbud', + 'vind nu', 'win now', 'limited time' + ] + } + + def classify(self, email_data: Dict) -> Dict: + """ + Classify email using simple keyword matching + Returns: {classification: str, confidence: float, reasoning: str} + """ + subject = (email_data.get('subject', '') or '').lower() + sender = (email_data.get('sender_email', '') or '').lower() + body = (email_data.get('body_text', '') or '').lower()[:500] # First 500 chars + + logger.info(f"🔍 simple_classifier: subject='{subject}', body_len={len(body)}, sender='{sender}'") + + # Combine all text for analysis + text = f"{subject} {body}" + + # Check each category + scores = {} + for category, keywords in self.keyword_rules.items(): + matches = 0 + matched_keywords = [] + + for keyword in keywords: + # Use regex for patterns like CC[0-9]{4} + if re.search(keyword, text, re.IGNORECASE): + matches += 1 + matched_keywords.append(keyword) + + if matches > 0: + scores[category] = { + 'matches': matches, + 'keywords': matched_keywords + } + + # Determine best match + if not scores: + return { + 'classification': 'general', + 'confidence': 0.5, + 'reasoning': 'No specific keywords matched - classified as general' + } + + # Get category with most matches + best_category = max(scores.items(), key=lambda x: x[1]['matches']) + category_name = best_category[0] + match_count = best_category[1]['matches'] + matched_keywords = best_category[1]['keywords'] + + # Calculate confidence (0.6-0.9 based on matches) + confidence = min(0.9, 0.6 + (match_count * 0.1)) + + reasoning = f"Matched {match_count} keyword(s): {', '.join(matched_keywords[:3])}" + + logger.info(f"✅ Keyword classification: {category_name} (confidence: {confidence:.2f})") + + return { + 'classification': category_name, + 'confidence': confidence, + 'reasoning': reasoning + } + + +# Global instance +simple_classifier = SimpleEmailClassifier() diff --git a/app/shared/frontend/base.html b/app/shared/frontend/base.html index c47df0a..b2e52a8 100644 --- a/app/shared/frontend/base.html +++ b/app/shared/frontend/base.html @@ -212,6 +212,11 @@
  • Rapporter
  • +