"""
Email Management Router
API endpoints for email viewing, classification, and rule management
"""
import logging
from fastapi import APIRouter, HTTPException, Query, UploadFile, File
from typing import List, Optional, Dict
from pydantic import BaseModel
from datetime import datetime, date
from app.core.database import execute_query, execute_insert, execute_update, execute_query_single
from app.services.email_processor_service import EmailProcessorService
from app.services.email_workflow_service import email_workflow_service
from app.services.ollama_service import ollama_service
logger = logging.getLogger(__name__)
router = APIRouter()
# Pydantic Models
class EmailListItem(BaseModel):
id: int
message_id: str
subject: str
sender_email: str
sender_name: Optional[str]
received_date: datetime
classification: Optional[str]
confidence_score: Optional[float]
status: str
is_read: bool
has_attachments: bool
attachment_count: int
rule_name: Optional[str] = None
supplier_name: Optional[str] = None
customer_name: Optional[str] = None
class EmailAttachment(BaseModel):
id: int
email_id: int
filename: str
content_type: Optional[str]
size_bytes: Optional[int]
file_path: Optional[str]
created_at: datetime
class EmailDetail(BaseModel):
id: int
message_id: str
subject: str
sender_email: str
sender_name: Optional[str]
recipient_email: Optional[str]
cc: Optional[str]
body_text: Optional[str]
body_html: Optional[str]
received_date: datetime
folder: str
classification: Optional[str]
confidence_score: Optional[float]
status: str
is_read: bool
has_attachments: bool
attachment_count: int
rule_id: Optional[int]
supplier_id: Optional[int]
customer_id: Optional[int]
linked_case_id: Optional[int]
extracted_invoice_number: Optional[str]
extracted_amount: Optional[float]
extracted_due_date: Optional[date]
auto_processed: bool
created_at: datetime
updated_at: datetime
attachments: List[EmailAttachment] = []
customer_name: Optional[str] = None
supplier_name: Optional[str] = None
class EmailRule(BaseModel):
id: Optional[int] = None
name: str
description: Optional[str]
conditions: dict
action_type: str
action_params: Optional[dict] = {}
priority: int = 100
enabled: bool = True
match_count: int = 0
last_matched_at: Optional[datetime]
class EmailWorkflow(BaseModel):
id: Optional[int] = None
name: str
description: Optional[str]
classification_trigger: str
sender_pattern: Optional[str] = None
subject_pattern: Optional[str] = None
confidence_threshold: float = 0.70
workflow_steps: List[dict]
priority: int = 100
enabled: bool = True
stop_on_match: bool = True
execution_count: int = 0
success_count: int = 0
failure_count: int = 0
last_executed_at: Optional[datetime] = None
class WorkflowExecution(BaseModel):
id: int
workflow_id: int
email_id: int
status: str
steps_completed: int
steps_total: Optional[int]
result_json: Optional[List[dict]] = None # Can be list of step results
error_message: Optional[str]
started_at: datetime
completed_at: Optional[datetime]
execution_time_ms: Optional[int]
class WorkflowAction(BaseModel):
id: int
action_code: str
name: str
description: Optional[str]
category: Optional[str]
parameter_schema: Optional[dict]
example_config: Optional[dict]
enabled: bool
class ProcessingStats(BaseModel):
status: str
fetched: int = 0
saved: int = 0
classified: int = 0
awaiting_user_action: int = 0
rules_matched: int = 0
errors: int = 0
class CreateSagFromEmailRequest(BaseModel):
titel: Optional[str] = None
beskrivelse: Optional[str] = None
customer_id: Optional[int] = None
contact_id: Optional[int] = None
case_type: str = "support"
secondary_label: Optional[str] = None
start_date: Optional[date] = None
priority: Optional[str] = None
ansvarlig_bruger_id: Optional[int] = None
assigned_group_id: Optional[int] = None
created_by_user_id: int = 1
relation_type: str = "kommentar"
class LinkEmailToSagRequest(BaseModel):
sag_id: int
relation_type: str = "kommentar"
note: Optional[str] = None
forfatter: str = "E-mail Motor"
mark_processed: bool = True
@router.get("/emails/sag-options")
async def get_sag_assignment_options():
"""Return users and groups for SAG assignment controls in email UI."""
try:
users = execute_query(
"""
SELECT user_id AS id,
COALESCE(full_name, username, CONCAT('Bruger #', user_id::text)) AS name
FROM users
WHERE is_active = true
ORDER BY COALESCE(full_name, username, user_id::text)
"""
) or []
groups = execute_query(
"""
SELECT id, name
FROM groups
ORDER BY name
"""
) or []
return {"users": users, "groups": groups}
except Exception as e:
logger.error("❌ Error loading SAG assignment options: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/search-customers")
async def search_customers(q: str = Query(..., min_length=1), limit: int = Query(20, ge=1, le=100)):
"""Autocomplete customers for email-to-case flow."""
try:
like = f"%{q.strip()}%"
rows = execute_query(
"""
SELECT id, name, email_domain
FROM customers
WHERE (
name ILIKE %s
OR COALESCE(email, '') ILIKE %s
OR COALESCE(email_domain, '') ILIKE %s
OR COALESCE(cvr_number, '') ILIKE %s
)
ORDER BY name
LIMIT %s
""",
(like, like, like, like, limit)
)
return rows or []
except Exception as e:
logger.error("❌ Error searching customers from email router: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/search-sager")
async def search_sager(q: str = Query(..., min_length=1), limit: int = Query(20, ge=1, le=100)):
"""Autocomplete SAG cases for linking emails to existing cases."""
try:
like = f"%{q.strip()}%"
rows = execute_query(
"""
SELECT s.id, s.titel, s.status, s.priority, c.name AS customer_name
FROM sag_sager s
LEFT JOIN customers c ON c.id = s.customer_id
WHERE s.deleted_at IS NULL
AND (
s.titel ILIKE %s
OR COALESCE(s.beskrivelse, '') ILIKE %s
OR CAST(s.id AS TEXT) ILIKE %s
)
ORDER BY s.updated_at DESC
LIMIT %s
""",
(like, like, like, limit)
)
return rows or []
except Exception as e:
logger.error("❌ Error searching sager from email router: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# Email Endpoints
@router.get("/emails", response_model=List[EmailListItem])
async def list_emails(
status: Optional[str] = Query(None),
classification: Optional[str] = Query(None),
q: Optional[str] = Query(None),
limit: int = Query(50, le=500),
offset: int = Query(0, ge=0)
):
"""Get list of emails with filtering"""
try:
where_clauses = ["em.deleted_at IS NULL"]
params = []
if status:
where_clauses.append("em.status = %s")
params.append(status)
if classification:
where_clauses.append("em.classification = %s")
params.append(classification)
if q:
where_clauses.append("(em.subject ILIKE %s OR em.sender_email ILIKE %s OR em.sender_name ILIKE %s)")
search_term = f"%{q}%"
params.extend([search_term, search_term, search_term])
where_sql = " AND ".join(where_clauses)
query = f"""
SELECT
em.id, em.message_id, em.subject, em.sender_email, em.sender_name,
em.received_date, em.classification, em.confidence_score, em.status,
em.is_read, em.has_attachments, em.attachment_count,
em.body_text, em.body_html,
er.name as rule_name,
v.name as supplier_name,
c.name as customer_name
FROM email_messages em
LEFT JOIN email_rules er ON em.rule_id = er.id
LEFT JOIN vendors v ON em.supplier_id = v.id
LEFT JOIN customers c ON em.customer_id = c.id
WHERE {where_sql}
ORDER BY em.received_date DESC
LIMIT %s OFFSET %s
"""
params.extend([limit, offset])
result = execute_query(query, tuple(params))
return result
except Exception as e:
logger.error(f"❌ Error listing emails: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/{email_id}", response_model=EmailDetail)
async def get_email(email_id: int):
"""Get email detail by ID"""
try:
query = """
SELECT em.*,
c.name AS customer_name,
v.name AS supplier_name
FROM email_messages em
LEFT JOIN customers c ON em.customer_id = c.id
LEFT JOIN vendors v ON em.supplier_id = v.id
WHERE em.id = %s AND em.deleted_at IS NULL
"""
result = execute_query(query, (email_id,))
logger.info(f"🔍 Query result type: {type(result)}, length: {len(result) if result else 0}")
if not result:
raise HTTPException(status_code=404, detail="Email not found")
# Store email before update
email_data = result[0]
# Get attachments
att_query = "SELECT * FROM email_attachments WHERE email_id = %s ORDER BY id"
attachments = execute_query(att_query, (email_id,))
email_data['attachments'] = attachments or []
# Mark as read
update_query = "UPDATE email_messages SET is_read = true WHERE id = %s"
execute_update(update_query, (email_id,))
return email_data
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error getting email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/mark-processed")
async def mark_email_processed(email_id: int):
"""Mark email as processed and move to 'Processed' folder"""
try:
# Update email status and folder
update_query = """
UPDATE email_messages
SET status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s AND deleted_at IS NULL
RETURNING id, folder, status
"""
result = execute_query(update_query, (email_id,))
if not result:
raise HTTPException(status_code=404, detail="Email not found")
logger.info(f"✅ Email {email_id} marked as processed and moved to Processed folder")
return {
"success": True,
"email_id": result.get('id') if result else email_id,
"folder": result.get('folder') if result else 'Processed',
"status": result.get('status') if result else 'processed'
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error marking email {email_id} as processed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/{email_id}/attachments/{attachment_id}")
async def download_attachment(email_id: int, attachment_id: int):
"""Download email attachment"""
from fastapi.responses import FileResponse
import os
try:
query = """
SELECT a.* FROM email_attachments a
JOIN email_messages e ON e.id = a.email_id
WHERE a.id = %s AND a.email_id = %s AND e.deleted_at IS NULL
"""
result = execute_query(query, (attachment_id, email_id))
if not result:
raise HTTPException(status_code=404, detail="Attachment not found")
attachment = result[0]
file_path = attachment['file_path']
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(
path=file_path,
filename=attachment['filename'],
media_type=attachment.get('content_type', 'application/octet-stream')
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error downloading attachment {attachment_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put("/emails/{email_id}")
async def update_email(email_id: int, status: Optional[str] = None):
"""Update email (archive, mark as read, etc)"""
try:
# Build update fields dynamically
updates = []
params = []
if status:
updates.append("status = %s")
params.append(status)
if not updates:
raise HTTPException(status_code=400, detail="No fields to update")
params.append(email_id)
query = f"UPDATE email_messages SET {', '.join(updates)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s"
execute_update(query, tuple(params))
logger.info(f"✅ Updated email {email_id}: status={status}")
return {"success": True, "message": "Email updated"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error updating email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/emails/{email_id}/link")
async def link_email(email_id: int, payload: Dict):
"""Link email to a customer and/or vendor/supplier"""
try:
updates = []
params = []
if 'customer_id' in payload:
updates.append("customer_id = %s")
params.append(payload['customer_id'])
if 'supplier_id' in payload:
updates.append("supplier_id = %s")
params.append(payload['supplier_id'])
if not updates:
raise HTTPException(status_code=400, detail="Ingen felter at opdatere")
params.append(email_id)
query = f"UPDATE email_messages SET {', '.join(updates)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s"
execute_update(query, tuple(params))
logger.info(f"✅ Linked email {email_id}: {payload}")
return {"success": True, "message": "Email linket"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error linking email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/create-sag")
async def create_sag_from_email(email_id: int, payload: CreateSagFromEmailRequest):
"""Create a new SAG from an email and persist the email-case relation."""
try:
email_row = execute_query(
"SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_row:
raise HTTPException(status_code=404, detail="Email not found")
email_data = email_row[0]
customer_id = payload.customer_id or email_data.get('customer_id')
if not customer_id:
raise HTTPException(status_code=400, detail="customer_id is required (missing on email and payload)")
titel = (payload.titel or email_data.get('subject') or f"E-mail fra {email_data.get('sender_email', 'ukendt afsender')}").strip()
beskrivelse = payload.beskrivelse or email_data.get('body_text') or email_data.get('body_html') or ''
template_key = (payload.case_type or 'support').strip().lower()[:50]
priority = (payload.priority or 'normal').strip().lower()
if priority not in {'low', 'normal', 'high', 'urgent'}:
raise HTTPException(status_code=400, detail="priority must be one of: low, normal, high, urgent")
case_result = execute_query(
"""
INSERT INTO sag_sager
(titel, beskrivelse, template_key, status, customer_id, ansvarlig_bruger_id, assigned_group_id, created_by_user_id, priority, start_date)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id, titel, customer_id, status, template_key, priority, start_date, created_at
""",
(
titel,
beskrivelse,
template_key,
'åben',
customer_id,
payload.ansvarlig_bruger_id,
payload.assigned_group_id,
payload.created_by_user_id,
priority,
payload.start_date,
)
)
if not case_result:
raise HTTPException(status_code=500, detail="Failed to create SAG")
sag = case_result[0]
sag_id = sag['id']
# Link email to SAG (audit trail)
execute_update(
"""
INSERT INTO sag_emails (sag_id, email_id)
VALUES (%s, %s)
ON CONFLICT (sag_id, email_id) DO NOTHING
""",
(sag_id, email_id)
)
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(sag_id, email_id)
)
if payload.contact_id:
execute_update(
"""
INSERT INTO sag_kontakter (sag_id, contact_id, role)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
""",
(sag_id, payload.contact_id, 'primary')
)
relation_type = (payload.relation_type or 'kommentar').strip().lower()
if relation_type in {'kommentar', 'intern_note', 'kundeopdatering'}:
system_note = (
f"E-mail knyttet som {relation_type}.\n"
f"Emne: {email_data.get('subject') or '(ingen emne)'}\n"
f"Fra: {email_data.get('sender_email') or '(ukendt)'}"
)
if payload.secondary_label:
system_note += f"\nLabel: {payload.secondary_label.strip()[:60]}"
execute_update(
"""
INSERT INTO sag_kommentarer (sag_id, forfatter, indhold, er_system_besked)
VALUES (%s, %s, %s, %s)
""",
(sag_id, 'E-mail Motor', system_note, True)
)
return {
"success": True,
"email_id": email_id,
"sag": sag,
"message": "SAG oprettet fra e-mail"
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error creating SAG from email %s: %s", email_id, e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/link-sag")
async def link_email_to_sag(email_id: int, payload: LinkEmailToSagRequest):
"""Link an email to an existing SAG and optionally append a system note."""
try:
email_row = execute_query(
"SELECT id, subject, sender_email FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_row:
raise HTTPException(status_code=404, detail="Email not found")
sag_row = execute_query(
"SELECT id, titel FROM sag_sager WHERE id = %s AND deleted_at IS NULL",
(payload.sag_id,)
)
if not sag_row:
raise HTTPException(status_code=404, detail="SAG not found")
execute_update(
"""
INSERT INTO sag_emails (sag_id, email_id)
VALUES (%s, %s)
ON CONFLICT (sag_id, email_id) DO NOTHING
""",
(payload.sag_id, email_id)
)
if payload.mark_processed:
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(payload.sag_id, email_id)
)
else:
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(payload.sag_id, email_id)
)
relation_type = (payload.relation_type or 'kommentar').strip().lower()
if relation_type in {'kommentar', 'intern_note', 'kundeopdatering'}:
email_data = email_row[0]
note = payload.note or (
f"E-mail knyttet som {relation_type}. "
f"Emne: {email_data.get('subject') or '(ingen emne)'}"
)
execute_update(
"""
INSERT INTO sag_kommentarer (sag_id, forfatter, indhold, er_system_besked)
VALUES (%s, %s, %s, %s)
""",
(payload.sag_id, payload.forfatter, note, True)
)
return {
"success": True,
"email_id": email_id,
"sag_id": payload.sag_id,
"message": "E-mail knyttet til SAG"
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error linking email %s to SAG %s: %s", email_id, payload.sag_id, e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/extract-vendor-suggestion")
async def extract_vendor_suggestion(email_id: int):
"""
Udtrækker leverandørinfo fra email body og vedhæftede PDF-fakturaer.
Bruger stærke regex-mønstre + AI for CVR, adresse, telefon, domæne.
"""
import re
import os
# ── Hjælpefunktioner ────────────────────────────────────────────────────
def clean_phone(raw: str) -> str:
"""Normaliser telefonnummer til +45 XXXX XXXX eller 8 cifre"""
digits = re.sub(r'[^\d+]', '', raw)
if digits.startswith('+45') and len(digits) == 11:
return digits
if digits.startswith('45') and len(digits) == 10:
return '+' + digits
bare = re.sub(r'\D', '', raw)
if len(bare) == 8:
return bare
return raw.strip()[:20]
# Kendte faktureringsplatforme der sender på vegne af leverandører
PLATFORM_DOMAINS = {
'e-conomic.com', 'e-conomic.dk', 'dinero.dk', 'billy.dk',
'uniconta.com', 'visma.com', 'simplybilling.dk', 'fakturasend.dk',
'invoicecloud.com', 'invoiced.com', 'stripe.com', 'paypal.com',
}
# Placeholder-adresser (e-conomic og lignende skabeloner)
PLACEHOLDER_ADDRESSES = {
'vejnavn 1, 1234 by', 'vejnavn 1,1234 by', 'vejnavn 1',
'1234 by', 'adresse 1', 'eksempel 1', 'gadenavn 1',
}
def is_placeholder_cvr(cvr: str) -> bool:
"""Filtrer åbenlyse placeholder/dummy CVRs"""
known_fakes = {
'12345678', '87654321', '00000000', '11111111', '22222222',
'33333333', '44444444', '55555555', '66666666', '77777777',
'88888888', '99999999', '12341234', '11223344', '99887766',
}
if cvr in known_fakes:
return True
if len(set(cvr)) == 1:
return True
digits = [int(c) for c in cvr]
if all(digits[i+1] - digits[i] == 1 for i in range(7)):
return True
return False
def extract_cvr(text: str, own_cvr: str = '') -> Optional[str]:
patterns = [
# Med label
r'(?:CVR|Cvr\.?-?nr\.?|cvr|Moms(?:nr\.?|registrerings?nr\.?)|VAT\s*(?:no\.?|nr\.?|number))[:\s.\-–]*(?:DK)?[\s\-]?(\d{8})',
# DK-præfiks
r'\bDK[\s\-]?(\d{8})\b',
# Standalone 8 cifre (sidst – mindst specifik)
r'\b(\d{8})\b',
]
for pat in patterns:
for m in re.finditer(pat, text, re.IGNORECASE):
val = m.group(1)
if val != own_cvr and val.isdigit() and not is_placeholder_cvr(val):
return val
return None
def extract_phones(text: str) -> Optional[str]:
patterns = [
# Med label
r'(?:Tlf\.?|Tel\.?|Telefon|Phone|Mobil|Fax)[:\s.\-–]*(\+?[\d][\d\s\-().]{6,18})',
# +45 XXXXXXXX
r'(\+45[\s\-]?\d{2}[\s\-]?\d{2}[\s\-]?\d{2}[\s\-]?\d{2})',
# 8 cifre i grupper: 12 34 56 78 / 1234 5678
r'\b(\d{2}[\s\-]\d{2}[\s\-]\d{2}[\s\-]\d{2})\b',
r'\b(\d{4}[\s\-]\d{4})\b',
]
for pat in patterns:
m = re.search(pat, text, re.IGNORECASE)
if m:
return clean_phone(m.group(1))
return None
def extract_address(text: str) -> Optional[str]:
# Format 1: "Vejnavn 12K[, etage/side][ -/,] 4000 By"
# Håndterer: "Jernbanegade 12K, st.tv - 4000 Roskilde"
# "Nørregade 5, 1. sal, 8000 Aarhus"
# "Industrivej 3 - 2200 København N"
m = re.search(
r'([A-ZÆØÅ][a-zæøåA-ZÆØÅ\-\.]{2,}\s+\d+[A-Za-z]?'
r'(?:[,\s]+[a-zæøåA-ZÆØÅ0-9][a-zæøåA-ZÆØÅ0-9\.\s]{0,15}?)?'
r'(?:[,\s]+|\s*[-–]\s*)\d{4}\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]{2,})',
text
)
if m:
return m.group(0).strip()
# Format 2: vejnavn-suffix (vej/gade/alle osv.) + husnummer + postnummer
m = re.search(
r'([A-ZÆØÅ][a-zæøåA-ZÆØÅ]+(?:vej|gade|alle|vænge|torv|plads|stræde|boulevard|have|bakke|skov|park|strand|mark|eng)'
r'\s*\d+[A-Za-z]?(?:[,\s]+|\s*[-–]\s*)\d{4}(?:\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]+)?)',
text, re.IGNORECASE
)
if m:
return m.group(0).strip()
# Format 3: find postnummer + by, tag kontekst foran
m = re.search(r'(\d{4}\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]{2,})', text)
if m:
start = max(0, m.start() - 50)
snippet = text[start:m.end()].strip().lstrip('-–, ')
return snippet
return None
def is_platform_or_spam(domain: str) -> bool:
"""Returnerer True hvis domænet tilhører en platform/mailsystem"""
spam = {'gmail.com', 'hotmail.com', 'outlook.com', 'yahoo.com', 'live.com', 'icloud.com'}
return domain in spam or domain in PLATFORM_DOMAINS or 'bmc' in domain
def extract_domain(text: str, sender_email: str = '') -> Optional[str]:
# Eksplicit www
m = re.search(r'(?:www\.|https?://)([\w\-]+\.[\w\-]+(?:\.[\w]{2,6})?)', text, re.IGNORECASE)
if m:
dom = m.group(1).lower()
if not is_platform_or_spam(dom):
return dom
# Emailadresser i teksten
for em in re.finditer(r'[\w.\-+]+@([\w\-]+\.[\w\-]+(?:\.[\w]{2,6})?)', text):
dom = em.group(1).lower()
if not is_platform_or_spam(dom):
return dom
# Sender email (kun hvis ikke platform)
if sender_email and '@' in sender_email:
dom = sender_email.split('@')[1].lower()
if not is_platform_or_spam(dom):
return dom
return None
def extract_company_name(text: str, sender_name: str = '') -> Optional[str]:
"""Find firmanavn via DK-suffikser, footer-mønster eller CVR-nær tekst"""
# Prioritet 1: navne med DK juridiske suffikser
m = re.search(
r'\b([\w\s\-&\'\.]+(?:A/S|ApS|IVS|I/S|K/S|P/S|GmbH|Ltd\.?|LLC|AB|AS))\b',
text
)
if m:
return m.group(1).strip()
# Prioritet 2: e-conomic footer: "FirmaNavn - Adresse - ..."
# Virker både med og uden linjeskift foran
m = re.search(
r'(?:^|\n)([A-ZÆØÅ][A-Za-zæøåÆØÅ][^\n\-]{1,40}?)\s*[-–]\s*[A-ZÆØÅ][a-zæøåA-ZÆØÅ]',
text, re.MULTILINE
)
if m:
name = m.group(1).strip()
if len(name) > 2 and not any(w in name.lower() for w in ('tlf', 'tel', 'mail', 'bank', 'cvr', 'mobil', 'kontonr', 'faktura')):
return name
# Prioritet 3: tekst umiddelbart FORAN "CVR" (typisk "FirmaNavn CVR-nr.")
m = re.search(
r'([A-ZÆØÅ][A-Za-zæøåÆØÅ\s&\'\.]{2,50}?)\s*[-–,]?\s*(?:CVR|cvr)',
text
)
if m:
name = m.group(1).strip().rstrip('-–, \t')
if len(name) > 2 and not any(w in name.lower() for w in ('tlf', 'mail', 'bank')):
return name
return sender_name or None
def parse_vendor_footer(text: str, own_cvr: str = '') -> dict:
"""
Parser specifikt til e-conomic/Dinero footer-format:
"KONI Accounting - Jernbanegade 12K, st.tv - 4000 Roskilde - DK - CVR-nr.: 35962344"
Splitter på ' - ' og identificerer segmenterne.
"""
result = {}
# Find linjer der indeholder både vejnavn/postnummer OG CVR-lignende mønstre
# eller blot det klassiske "Firma - Adresse - Postnr By" mønster
for line in text.replace('\r', '\n').split('\n'):
line = line.strip()
if len(line) < 10:
continue
# Forsøg: split på ' - ' eller ' – '
parts = re.split(r'\s*[-–]\s*', line)
if len(parts) < 3:
continue
# Del 0 er typisk firmanavnet (ingen tal, ingen '@')
# Del 1 er typisk adressen (indeholder tal + vejnavn)
# Del 2 (eller del med 4 cifre) er postnummer + by
name_candidate = parts[0].strip()
if not name_candidate or any(c.isdigit() for c in name_candidate[:3]):
continue
if any(w in name_candidate.lower() for w in ('tlf', 'tel', 'mail', 'bank', 'cvr', 'mobil', 'kontonr')):
continue
# Find adresse-del (indeholder et vejnummer: bogstaver + tal)
addr_part = None
zip_city_part = None
for part in parts[1:]:
part = part.strip()
# Postnummer-format: 4 cifre + by
if re.match(r'^\d{4}\s+[A-ZÆØÅ]', part):
zip_city_part = part
elif re.search(r'\d', part) and addr_part is None:
# Del med tal = adresse
if not re.match(r'^DK$', part.strip(), re.IGNORECASE):
addr_part = part
if name_candidate and (addr_part or zip_city_part):
result['name'] = name_candidate
if addr_part and zip_city_part:
result['address'] = f"{addr_part}, {zip_city_part}"
elif addr_part:
result['address'] = addr_part
elif zip_city_part:
result['address'] = zip_city_part
# Find CVR i denne linje
cvr_m = re.search(r'CVR[^:]*:\s*(\d{8})', line, re.IGNORECASE)
if cvr_m:
val = cvr_m.group(1)
if val != own_cvr and not is_placeholder_cvr(val):
result['cvr_number'] = val
# Find telefon i denne linje
phone_m = re.search(r'(?:Tlf|Tel|Mobil)[.:]?\s*(\+?[\d][\d\s\-]{6,15})', line, re.IGNORECASE)
if phone_m:
result['phone'] = clean_phone(phone_m.group(1))
# Find email i denne linje
email_m = re.search(r'(?:Mail|E-mail|Email)[.:]?\s*([\w.\-+]+@[\w\-]+\.[\w\-]+)', line, re.IGNORECASE)
if email_m:
dom = email_m.group(1).split('@')[1].lower()
if dom not in PLATFORM_DOMAINS and 'bmc' not in dom:
result['email'] = email_m.group(1)
result['domain'] = dom
if result.get('name') or result.get('cvr_number'):
break # Første matchende linje er nok
return result
# ── Hoved-logik ─────────────────────────────────────────────────────────
try:
email_result = execute_query(
"SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_result:
raise HTTPException(status_code=404, detail="Email ikke fundet")
email = email_result[0]
from app.core.config import settings
own_cvr = getattr(settings, 'OWN_CVR', '')
# ── Hurtig genvej: brug allerede-udtrukket vendor-data fra PDF ──────
# (sat af email_analysis_service ved email-modtagelse, v2.2.18+)
pre_cvr = email.get('extracted_vendor_cvr')
pre_name = email.get('extracted_vendor_name')
if pre_cvr and pre_cvr != own_cvr and not is_placeholder_cvr(pre_cvr):
# Forsøg CVR-opslag i vendors-tabel
vendor_row = execute_query_single(
"SELECT id, name, cvr_number, phone, email, address FROM vendors WHERE cvr_number = %s",
(pre_cvr,)
)
suggestion = {
"name": (vendor_row and vendor_row.get('name')) or pre_name or None,
"cvr_number": pre_cvr,
"phone": (vendor_row and vendor_row.get('phone')) or None,
"email": (vendor_row and vendor_row.get('email')) or None,
"address": (vendor_row and vendor_row.get('address')) or None,
"domain": None,
"source": "pdf_extraction",
"vendor_id": vendor_row.get('id') if vendor_row else None,
"match_score": 100 if vendor_row else 0,
"confidence": 0.95,
}
logger.info(
f"⚡ Hurtig vendor-suggestion fra PDF-extraction for email {email_id}: "
f"CVR={pre_cvr}, vendor_id={suggestion['vendor_id']}"
)
return suggestion
def resolve_file_path(raw_path: str) -> Optional[str]:
"""Løs relativ/absolut filsti — prøv /app-prefix i Docker"""
import os
if os.path.exists(raw_path):
return raw_path
# Docker: CWD er /app, så prøv begge varianter
for base in ('/app', '/app/app', ''):
candidate = os.path.join(base, raw_path.lstrip('/'))
if os.path.exists(candidate):
return candidate
return None
def html_to_text(html: str) -> str:
"""Fjern HTML-tags og decode entities til plain text"""
import html as html_lib
# Fjern style/script blokke
text = re.sub(r'<(style|script)[^>]*>.*?\1>', '', html, flags=re.DOTALL | re.IGNORECASE)
# Erstat
,
,