Preserve email body in auto-created cases and backfill missing content

This commit is contained in:
Christian 2026-04-03 00:50:34 +02:00
parent 267f7e716c
commit fb2243f0d4
2 changed files with 87 additions and 2 deletions

View File

@ -12,6 +12,7 @@ import json
import hashlib
import shutil
import io
import html
from pathlib import Path
from decimal import Decimal
from uuid import uuid4
@ -965,13 +966,45 @@ class EmailWorkflowService:
return "\n".join(cleaned_lines).strip()
def _html_to_text(self, body_html: str) -> str:
"""Convert HTML email bodies to readable plain text fallback."""
if not body_html:
return ""
text = str(body_html)
text = re.sub(r'<(style|script)[^>]*>.*?</\1>', '', text, flags=re.IGNORECASE | re.DOTALL)
text = re.sub(r'<\s*br\s*/?>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'</\s*(p|div|li|tr|h[1-6])\s*>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
text = text.replace('\r\n', '\n').replace('\r', '\n')
text = re.sub(r'\n{3,}', '\n\n', text)
text = re.sub(r'[ \t]{2,}', ' ', text)
return text.strip()
def _extract_primary_email_body(self, email_data: Dict) -> str:
"""Return best-effort email body text for case/comment persistence."""
raw_text = (email_data.get('body_text') or '').strip()
if raw_text:
cleaned = self._strip_quoted_email_text(raw_text)
if cleaned:
return cleaned
html_fallback = self._html_to_text(email_data.get('body_html') or '')
if html_fallback:
cleaned = self._strip_quoted_email_text(html_fallback)
if cleaned:
return cleaned
return ""
def _add_helpdesk_comment(self, sag_id: int, email_data: Dict) -> None:
email_id = email_data.get('id')
sender = email_data.get('sender_email') or 'ukendt'
subject = email_data.get('subject') or '(ingen emne)'
received = email_data.get('received_date')
received_str = received.isoformat() if hasattr(received, 'isoformat') else str(received or '')
body_text = self._strip_quoted_email_text((email_data.get('body_text') or '').strip())
body_text = self._extract_primary_email_body(email_data)
email_meta_line = f"Email-ID: {email_id}\n" if email_id else ""
@ -995,12 +1028,13 @@ class EmailWorkflowService:
def _create_sag_from_email(self, email_data: Dict, customer_id: int) -> Dict[str, Any]:
sender = email_data.get('sender_email') or 'ukendt'
subject = (email_data.get('subject') or '').strip() or f"Email fra {sender}"
body_text = self._extract_primary_email_body(email_data)
description = (
f"Auto-oprettet fra email\n"
f"Fra: {sender}\n"
f"Message-ID: {email_data.get('message_id') or ''}\n\n"
f"{(email_data.get('body_text') or '').strip()}"
f"{body_text}"
)
rows = execute_query(

View File

@ -0,0 +1,51 @@
-- Migration 160: Backfill missing body text in auto-created SAG descriptions
-- Fills existing cases where description only contains metadata and no actual email content.
WITH candidates AS (
SELECT
s.id AS sag_id,
s.beskrivelse,
COALESCE(
NULLIF(BTRIM(em.body_text), ''),
NULLIF(
BTRIM(
REGEXP_REPLACE(
REGEXP_REPLACE(COALESCE(em.body_html, ''), '<(style|script)[^>]*>.*?</\1>', ' ', 'gis'),
'<[^>]+>',
' ',
'g'
)
),
''
)
) AS recovered_body
FROM sag_sager s
JOIN sag_emails se ON se.sag_id = s.id
JOIN email_messages em ON em.id = se.email_id
WHERE s.beskrivelse LIKE 'Auto-oprettet fra email%'
AND COALESCE(
NULLIF(
BTRIM(
REGEXP_REPLACE(
COALESCE(s.beskrivelse, ''),
'(?s)^Auto-oprettet fra email\s*\nFra:[^\n]*\nMessage-ID:[^\n]*\n*',
''
)
),
''
),
''
) = ''
), dedup AS (
SELECT DISTINCT ON (sag_id)
sag_id,
recovered_body
FROM candidates
WHERE recovered_body IS NOT NULL
AND recovered_body <> ''
ORDER BY sag_id
)
UPDATE sag_sager s
SET beskrivelse = CONCAT_WS(E'\n\n', BTRIM(COALESCE(s.beskrivelse, '')), dedup.recovered_body)
FROM dedup
WHERE s.id = dedup.sag_id;