From fb2243f0d48c705e0255be64f96cc857e07f6e56 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 3 Apr 2026 00:50:34 +0200 Subject: [PATCH] Preserve email body in auto-created cases and backfill missing content --- app/services/email_workflow_service.py | 38 +++++++++++++- .../160_backfill_autocreated_case_body.sql | 51 +++++++++++++++++++ 2 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 migrations/160_backfill_autocreated_case_body.sql diff --git a/app/services/email_workflow_service.py b/app/services/email_workflow_service.py index 8e419c6..b583da4 100644 --- a/app/services/email_workflow_service.py +++ b/app/services/email_workflow_service.py @@ -12,6 +12,7 @@ import json import hashlib import shutil import io +import html from pathlib import Path from decimal import Decimal from uuid import uuid4 @@ -965,13 +966,45 @@ class EmailWorkflowService: return "\n".join(cleaned_lines).strip() + def _html_to_text(self, body_html: str) -> str: + """Convert HTML email bodies to readable plain text fallback.""" + if not body_html: + return "" + + text = str(body_html) + text = re.sub(r'<(style|script)[^>]*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) + text = re.sub(r'<\s*br\s*/?>', '\n', text, flags=re.IGNORECASE) + text = re.sub(r'', '\n', text, flags=re.IGNORECASE) + text = re.sub(r'<[^>]+>', ' ', text) + text = html.unescape(text) + text = text.replace('\r\n', '\n').replace('\r', '\n') + text = re.sub(r'\n{3,}', '\n\n', text) + text = re.sub(r'[ \t]{2,}', ' ', text) + return text.strip() + + def _extract_primary_email_body(self, email_data: Dict) -> str: + """Return best-effort email body text for case/comment persistence.""" + raw_text = (email_data.get('body_text') or '').strip() + if raw_text: + cleaned = self._strip_quoted_email_text(raw_text) + if cleaned: + return cleaned + + html_fallback = self._html_to_text(email_data.get('body_html') or '') + if html_fallback: + cleaned = self._strip_quoted_email_text(html_fallback) + if cleaned: + return cleaned + + return "" + def _add_helpdesk_comment(self, sag_id: int, email_data: Dict) -> None: email_id = email_data.get('id') sender = email_data.get('sender_email') or 'ukendt' subject = email_data.get('subject') or '(ingen emne)' received = email_data.get('received_date') received_str = received.isoformat() if hasattr(received, 'isoformat') else str(received or '') - body_text = self._strip_quoted_email_text((email_data.get('body_text') or '').strip()) + body_text = self._extract_primary_email_body(email_data) email_meta_line = f"Email-ID: {email_id}\n" if email_id else "" @@ -995,12 +1028,13 @@ class EmailWorkflowService: def _create_sag_from_email(self, email_data: Dict, customer_id: int) -> Dict[str, Any]: sender = email_data.get('sender_email') or 'ukendt' subject = (email_data.get('subject') or '').strip() or f"Email fra {sender}" + body_text = self._extract_primary_email_body(email_data) description = ( f"Auto-oprettet fra email\n" f"Fra: {sender}\n" f"Message-ID: {email_data.get('message_id') or ''}\n\n" - f"{(email_data.get('body_text') or '').strip()}" + f"{body_text}" ) rows = execute_query( diff --git a/migrations/160_backfill_autocreated_case_body.sql b/migrations/160_backfill_autocreated_case_body.sql new file mode 100644 index 0000000..59d5bb9 --- /dev/null +++ b/migrations/160_backfill_autocreated_case_body.sql @@ -0,0 +1,51 @@ +-- Migration 160: Backfill missing body text in auto-created SAG descriptions +-- Fills existing cases where description only contains metadata and no actual email content. + +WITH candidates AS ( + SELECT + s.id AS sag_id, + s.beskrivelse, + COALESCE( + NULLIF(BTRIM(em.body_text), ''), + NULLIF( + BTRIM( + REGEXP_REPLACE( + REGEXP_REPLACE(COALESCE(em.body_html, ''), '<(style|script)[^>]*>.*?', ' ', 'gis'), + '<[^>]+>', + ' ', + 'g' + ) + ), + '' + ) + ) AS recovered_body + FROM sag_sager s + JOIN sag_emails se ON se.sag_id = s.id + JOIN email_messages em ON em.id = se.email_id + WHERE s.beskrivelse LIKE 'Auto-oprettet fra email%' + AND COALESCE( + NULLIF( + BTRIM( + REGEXP_REPLACE( + COALESCE(s.beskrivelse, ''), + '(?s)^Auto-oprettet fra email\s*\nFra:[^\n]*\nMessage-ID:[^\n]*\n*', + '' + ) + ), + '' + ), + '' + ) = '' +), dedup AS ( + SELECT DISTINCT ON (sag_id) + sag_id, + recovered_body + FROM candidates + WHERE recovered_body IS NOT NULL + AND recovered_body <> '' + ORDER BY sag_id +) +UPDATE sag_sager s +SET beskrivelse = CONCAT_WS(E'\n\n', BTRIM(COALESCE(s.beskrivelse, '')), dedup.recovered_body) +FROM dedup +WHERE s.id = dedup.sag_id;