Preserve email body in auto-created cases and backfill missing content

This commit is contained in:
Christian 2026-04-03 00:50:34 +02:00
parent 267f7e716c
commit fb2243f0d4
2 changed files with 87 additions and 2 deletions

View File

@ -12,6 +12,7 @@ import json
import hashlib import hashlib
import shutil import shutil
import io import io
import html
from pathlib import Path from pathlib import Path
from decimal import Decimal from decimal import Decimal
from uuid import uuid4 from uuid import uuid4
@ -965,13 +966,45 @@ class EmailWorkflowService:
return "\n".join(cleaned_lines).strip() return "\n".join(cleaned_lines).strip()
def _html_to_text(self, body_html: str) -> str:
"""Convert HTML email bodies to readable plain text fallback."""
if not body_html:
return ""
text = str(body_html)
text = re.sub(r'<(style|script)[^>]*>.*?</\1>', '', text, flags=re.IGNORECASE | re.DOTALL)
text = re.sub(r'<\s*br\s*/?>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'</\s*(p|div|li|tr|h[1-6])\s*>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', ' ', text)
text = html.unescape(text)
text = text.replace('\r\n', '\n').replace('\r', '\n')
text = re.sub(r'\n{3,}', '\n\n', text)
text = re.sub(r'[ \t]{2,}', ' ', text)
return text.strip()
def _extract_primary_email_body(self, email_data: Dict) -> str:
"""Return best-effort email body text for case/comment persistence."""
raw_text = (email_data.get('body_text') or '').strip()
if raw_text:
cleaned = self._strip_quoted_email_text(raw_text)
if cleaned:
return cleaned
html_fallback = self._html_to_text(email_data.get('body_html') or '')
if html_fallback:
cleaned = self._strip_quoted_email_text(html_fallback)
if cleaned:
return cleaned
return ""
def _add_helpdesk_comment(self, sag_id: int, email_data: Dict) -> None: def _add_helpdesk_comment(self, sag_id: int, email_data: Dict) -> None:
email_id = email_data.get('id') email_id = email_data.get('id')
sender = email_data.get('sender_email') or 'ukendt' sender = email_data.get('sender_email') or 'ukendt'
subject = email_data.get('subject') or '(ingen emne)' subject = email_data.get('subject') or '(ingen emne)'
received = email_data.get('received_date') received = email_data.get('received_date')
received_str = received.isoformat() if hasattr(received, 'isoformat') else str(received or '') received_str = received.isoformat() if hasattr(received, 'isoformat') else str(received or '')
body_text = self._strip_quoted_email_text((email_data.get('body_text') or '').strip()) body_text = self._extract_primary_email_body(email_data)
email_meta_line = f"Email-ID: {email_id}\n" if email_id else "" email_meta_line = f"Email-ID: {email_id}\n" if email_id else ""
@ -995,12 +1028,13 @@ class EmailWorkflowService:
def _create_sag_from_email(self, email_data: Dict, customer_id: int) -> Dict[str, Any]: def _create_sag_from_email(self, email_data: Dict, customer_id: int) -> Dict[str, Any]:
sender = email_data.get('sender_email') or 'ukendt' sender = email_data.get('sender_email') or 'ukendt'
subject = (email_data.get('subject') or '').strip() or f"Email fra {sender}" subject = (email_data.get('subject') or '').strip() or f"Email fra {sender}"
body_text = self._extract_primary_email_body(email_data)
description = ( description = (
f"Auto-oprettet fra email\n" f"Auto-oprettet fra email\n"
f"Fra: {sender}\n" f"Fra: {sender}\n"
f"Message-ID: {email_data.get('message_id') or ''}\n\n" f"Message-ID: {email_data.get('message_id') or ''}\n\n"
f"{(email_data.get('body_text') or '').strip()}" f"{body_text}"
) )
rows = execute_query( rows = execute_query(

View File

@ -0,0 +1,51 @@
-- Migration 160: Backfill missing body text in auto-created SAG descriptions
-- Fills existing cases where description only contains metadata and no actual email content.
WITH candidates AS (
SELECT
s.id AS sag_id,
s.beskrivelse,
COALESCE(
NULLIF(BTRIM(em.body_text), ''),
NULLIF(
BTRIM(
REGEXP_REPLACE(
REGEXP_REPLACE(COALESCE(em.body_html, ''), '<(style|script)[^>]*>.*?</\1>', ' ', 'gis'),
'<[^>]+>',
' ',
'g'
)
),
''
)
) AS recovered_body
FROM sag_sager s
JOIN sag_emails se ON se.sag_id = s.id
JOIN email_messages em ON em.id = se.email_id
WHERE s.beskrivelse LIKE 'Auto-oprettet fra email%'
AND COALESCE(
NULLIF(
BTRIM(
REGEXP_REPLACE(
COALESCE(s.beskrivelse, ''),
'(?s)^Auto-oprettet fra email\s*\nFra:[^\n]*\nMessage-ID:[^\n]*\n*',
''
)
),
''
),
''
) = ''
), dedup AS (
SELECT DISTINCT ON (sag_id)
sag_id,
recovered_body
FROM candidates
WHERE recovered_body IS NOT NULL
AND recovered_body <> ''
ORDER BY sag_id
)
UPDATE sag_sager s
SET beskrivelse = CONCAT_WS(E'\n\n', BTRIM(COALESCE(s.beskrivelse, '')), dedup.recovered_body)
FROM dedup
WHERE s.id = dedup.sag_id;