"""
Email Management Router
API endpoints for email viewing, classification, and rule management
"""
import logging
from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Request
from typing import List, Optional, Dict
from pydantic import BaseModel
from datetime import datetime, date
import unicodedata
from app.core.database import execute_query, execute_insert, execute_update, execute_query_single
from app.services.email_processor_service import EmailProcessorService
from app.services.email_workflow_service import email_workflow_service
from app.services.ollama_service import ollama_service
logger = logging.getLogger(__name__)
router = APIRouter()
ALLOWED_SAG_EMAIL_RELATION_TYPES = {"mail"}
IGNORED_SENDER_DOMAINS = {
"bmcnetworks.dk",
"bmchub.local",
"outlook.com",
"hotmail.com",
"gmail.com",
"icloud.com",
"yahoo.com",
"live.com",
}
def _normalize_domain(value: Optional[str]) -> str:
domain = str(value or "").strip().lower()
if not domain:
return ""
if domain.startswith("www."):
return domain[4:]
return domain
def _extract_sender_domain(sender_email: Optional[str]) -> str:
sender = str(sender_email or "").strip().lower()
if "@" not in sender:
return ""
return _normalize_domain(sender.split("@", 1)[1])
def _is_ignored_sender_domain(domain: str) -> bool:
if not domain:
return True
return domain in IGNORED_SENDER_DOMAINS or "bmc" in domain
def _upsert_domain_mapping(domain: str, customer_id: int, source: str = "manual") -> None:
if not domain or not customer_id:
return
try:
execute_update(
"""
INSERT INTO email_domain_customer_mappings (domain, customer_id, source)
VALUES (%s, %s, %s)
ON CONFLICT (domain)
DO UPDATE SET
customer_id = EXCLUDED.customer_id,
source = EXCLUDED.source,
updated_at = CURRENT_TIMESTAMP
""",
(domain, customer_id, source),
)
except Exception as e:
# Keep linking flow operational even if mapping table is not migrated yet.
logger.warning("⚠️ Could not upsert domain mapping for %s: %s", domain, e)
def _resolve_procurement_customer_id() -> Optional[int]:
"""Resolve a fallback customer for supplier/procurement case creation."""
bmc_row = execute_query_single(
"""
SELECT id
FROM customers
WHERE is_active = true
AND LOWER(name) LIKE %s
ORDER BY CASE WHEN LOWER(name) LIKE %s THEN 0 ELSE 1 END, id
LIMIT 1
""",
("%bmc%", "%bmc networks%")
)
if bmc_row:
return int(bmc_row["id"])
fallback = execute_query_single(
"SELECT id FROM customers WHERE is_active = true ORDER BY id LIMIT 1"
)
if fallback:
return int(fallback["id"])
return None
def _normalize_case_type(value: Optional[str]) -> str:
raw = str(value or "").strip().lower()
if not raw:
return "support"
normalized = unicodedata.normalize("NFKD", raw)
ascii_value = normalized.encode("ascii", "ignore").decode("ascii").strip().lower()
return ascii_value or raw
def _is_supplier_case_type(case_type: Optional[str]) -> bool:
value = _normalize_case_type(case_type)
if value in {
"indkob",
"indkoeb",
"supplier",
"leverandor",
"leverandoer",
"vendor",
"procurement",
"purchase",
}:
return True
return "indk" in value or "leverand" in value or "supplier" in value
def _extract_domain_from_email(email: Optional[str]) -> str:
sender = str(email or "").strip().lower()
if "@" not in sender:
return ""
return _normalize_domain(sender.split("@", 1)[1])
def _find_customer_for_vendor(vendor: Dict) -> Optional[int]:
cvr = str(vendor.get("cvr_number") or "").strip()
if cvr:
row = execute_query_single(
"SELECT id FROM customers WHERE cvr_number = %s AND COALESCE(is_active, true) = true ORDER BY id LIMIT 1",
(cvr,),
)
if row:
return int(row["id"])
email = str(vendor.get("email") or "").strip().lower()
if email:
row = execute_query_single(
"SELECT id FROM customers WHERE LOWER(TRIM(email)) = %s AND COALESCE(is_active, true) = true ORDER BY id LIMIT 1",
(email,),
)
if row:
return int(row["id"])
domain = _normalize_domain(vendor.get("domain") or _extract_domain_from_email(vendor.get("email")))
if domain:
row = execute_query_single(
"""
SELECT id
FROM customers
WHERE COALESCE(is_active, true) = true
AND (
LOWER(TRIM(COALESCE(email_domain, ''))) = %s
OR LOWER(TRIM(COALESCE(email_domain, ''))) = %s
)
ORDER BY id
LIMIT 1
""",
(domain, f"www.{domain}"),
)
if row:
return int(row["id"])
name = str(vendor.get("name") or "").strip().lower()
if name:
row = execute_query_single(
"SELECT id FROM customers WHERE LOWER(TRIM(name)) = %s AND COALESCE(is_active, true) = true ORDER BY id LIMIT 1",
(name,),
)
if row:
return int(row["id"])
return None
def _ensure_customer_from_vendor(vendor_id: Optional[int]) -> Optional[int]:
if not vendor_id:
return None
vendor = execute_query_single(
"""
SELECT id, name, email, phone, address, cvr_number, domain, city, postal_code, country, website
FROM vendors
WHERE id = %s AND is_active = true
""",
(vendor_id,),
)
if not vendor:
return None
existing_customer_id = _find_customer_for_vendor(vendor)
if existing_customer_id:
return existing_customer_id
name = str(vendor.get("name") or "").strip()
if not name:
return None
domain = _normalize_domain(vendor.get("domain") or _extract_domain_from_email(vendor.get("email"))) or None
try:
created_id = execute_insert(
"""
INSERT INTO customers (name, email, phone, address, cvr_number, email_domain, city, postal_code, country, website, is_active)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, true)
RETURNING id
""",
(
name,
vendor.get("email"),
vendor.get("phone"),
vendor.get("address"),
vendor.get("cvr_number"),
domain,
vendor.get("city"),
vendor.get("postal_code"),
vendor.get("country") or "DK",
vendor.get("website"),
),
)
return int(created_id)
except Exception:
# Handle potential race/unique conflict by resolving again.
return _find_customer_for_vendor(vendor)
# Pydantic Models
class EmailListItem(BaseModel):
id: int
message_id: str
subject: str
sender_email: str
sender_name: Optional[str]
received_date: datetime
classification: Optional[str]
confidence_score: Optional[float]
status: str
is_read: bool
has_attachments: bool
attachment_count: int
rule_name: Optional[str] = None
supplier_name: Optional[str] = None
customer_name: Optional[str] = None
linked_case_id: Optional[int] = None
linked_case_title: Optional[str] = None
class EmailAttachment(BaseModel):
id: int
email_id: int
filename: str
content_type: Optional[str]
size_bytes: Optional[int]
file_path: Optional[str]
created_at: datetime
class EmailDetail(BaseModel):
id: int
message_id: str
subject: str
sender_email: str
sender_name: Optional[str]
recipient_email: Optional[str]
cc: Optional[str]
body_text: Optional[str]
body_html: Optional[str]
received_date: datetime
folder: str
classification: Optional[str]
confidence_score: Optional[float]
status: str
is_read: bool
has_attachments: bool
attachment_count: int
rule_id: Optional[int]
supplier_id: Optional[int]
customer_id: Optional[int]
linked_case_id: Optional[int]
extracted_invoice_number: Optional[str]
extracted_amount: Optional[float]
extracted_due_date: Optional[date]
auto_processed: bool
created_at: datetime
updated_at: datetime
attachments: List[EmailAttachment] = []
customer_name: Optional[str] = None
supplier_name: Optional[str] = None
linked_case_title: Optional[str] = None
class EmailRule(BaseModel):
id: Optional[int] = None
name: str
description: Optional[str]
conditions: dict
action_type: str
action_params: Optional[dict] = {}
priority: int = 100
enabled: bool = True
match_count: int = 0
last_matched_at: Optional[datetime]
class EmailWorkflow(BaseModel):
id: Optional[int] = None
name: str
description: Optional[str]
classification_trigger: str
sender_pattern: Optional[str] = None
subject_pattern: Optional[str] = None
confidence_threshold: float = 0.70
workflow_steps: List[dict]
priority: int = 100
enabled: bool = True
stop_on_match: bool = True
execution_count: int = 0
success_count: int = 0
failure_count: int = 0
last_executed_at: Optional[datetime] = None
class WorkflowExecution(BaseModel):
id: int
workflow_id: int
email_id: int
status: str
steps_completed: int
steps_total: Optional[int]
result_json: Optional[List[dict]] = None # Can be list of step results
error_message: Optional[str]
started_at: datetime
completed_at: Optional[datetime]
execution_time_ms: Optional[int]
class WorkflowAction(BaseModel):
id: int
action_code: str
name: str
description: Optional[str]
category: Optional[str]
parameter_schema: Optional[dict]
example_config: Optional[dict]
enabled: bool
class ProcessingStats(BaseModel):
status: str
fetched: int = 0
saved: int = 0
classified: int = 0
awaiting_user_action: int = 0
rules_matched: int = 0
errors: int = 0
class CreateSagFromEmailRequest(BaseModel):
titel: Optional[str] = None
beskrivelse: Optional[str] = None
customer_id: Optional[int] = None
contact_id: Optional[int] = None
case_type: str = "support"
secondary_label: Optional[str] = None
start_date: Optional[date] = None
deadline: Optional[date] = None
priority: Optional[str] = None
ansvarlig_bruger_id: Optional[int] = None
assigned_group_id: Optional[int] = None
created_by_user_id: int = 1
relation_type: str = "mail"
class EmailReadStateUpdate(BaseModel):
is_read: bool
def _can_user_mark_case_email_read(user_id: Optional[int], linked_case_id: Optional[int]) -> bool:
"""Allow read-marking only for assignee user or assignee group members."""
if not linked_case_id:
# Non-case emails can still be marked read.
return True
if not user_id:
return False
case_row = execute_query_single(
"""
SELECT ansvarlig_bruger_id, assigned_group_id
FROM sag_sager
WHERE id = %s AND deleted_at IS NULL
""",
(linked_case_id,),
) or {}
assigned_user_id = case_row.get("ansvarlig_bruger_id")
assigned_group_id = case_row.get("assigned_group_id")
if assigned_user_id is not None and int(assigned_user_id) == int(user_id):
return True
if assigned_group_id is not None:
user_group = execute_query_single(
"SELECT 1 FROM user_groups WHERE user_id = %s AND group_id = %s LIMIT 1",
(user_id, assigned_group_id),
)
if user_group:
return True
return False
class LinkEmailToSagRequest(BaseModel):
sag_id: int
relation_type: str = "mail"
mark_processed: bool = True
class RewriteEmailTextRequest(BaseModel):
text: str
context: Optional[str] = "email"
class RewriteEmailTextResponse(BaseModel):
rewritten_text: str
model: Optional[str] = None
endpoint: Optional[str] = None
context: Optional[str] = None
class DomainMappingUpsertRequest(BaseModel):
domain: str
customer_id: int
source: Optional[str] = "manual"
@router.get("/emails/sag-options")
async def get_sag_assignment_options():
"""Return users and groups for SAG assignment controls in email UI."""
try:
users = execute_query(
"""
SELECT user_id AS id,
COALESCE(full_name, username, CONCAT('Bruger #', user_id::text)) AS name
FROM users
WHERE is_active = true
ORDER BY COALESCE(full_name, username, user_id::text)
"""
) or []
groups = execute_query(
"""
SELECT id, name
FROM groups
ORDER BY name
"""
) or []
return {"users": users, "groups": groups}
except Exception as e:
logger.error("❌ Error loading SAG assignment options: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/search-customers")
async def search_customers(q: str = Query(..., min_length=1), limit: int = Query(20, ge=1, le=100)):
"""Autocomplete customers for email-to-case flow."""
try:
q_clean = q.strip()
like = f"%{q_clean}%"
prefix = f"{q_clean}%"
rows = execute_query(
"""
SELECT
id,
name,
email,
email_domain,
cvr_number,
CASE
WHEN LOWER(name) = LOWER(%s) THEN 500
WHEN LOWER(name) LIKE LOWER(%s) THEN 300
WHEN COALESCE(email_domain, '') ILIKE %s THEN 200
WHEN COALESCE(cvr_number, '') ILIKE %s THEN 180
WHEN COALESCE(email, '') ILIKE %s THEN 120
ELSE 50
END AS rank_score
FROM customers
WHERE (
name ILIKE %s
OR COALESCE(email, '') ILIKE %s
OR COALESCE(email_domain, '') ILIKE %s
OR COALESCE(cvr_number, '') ILIKE %s
)
ORDER BY rank_score DESC, name ASC
LIMIT %s
""",
(q_clean, prefix, prefix, prefix, prefix, like, like, like, like, limit)
)
return rows or []
except Exception as e:
logger.error("❌ Error searching customers from email router: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/{email_id}/domain-customer-suggestion")
async def get_domain_customer_suggestion(email_id: int):
"""Suggest customer based on sender domain for mails without known contact/customer."""
try:
email_row = execute_query_single(
"SELECT id, sender_email, customer_id FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,),
)
if not email_row:
raise HTTPException(status_code=404, detail="Email not found")
if email_row.get("customer_id"):
return {
"email_id": email_id,
"domain": None,
"has_customer": True,
"ignored": False,
"suggestion": None,
}
sender_email = str(email_row.get("sender_email") or "").strip().lower()
if sender_email:
contact_match = execute_query_single(
"""
SELECT
c.id,
c.name,
c.email_domain,
c.cvr_number,
ct.id AS contact_id,
ct.first_name,
ct.last_name
FROM contacts ct
JOIN contact_companies cc ON cc.contact_id = ct.id
JOIN customers c ON c.id = cc.customer_id
WHERE c.is_active = true
AND LOWER(TRIM(COALESCE(ct.email, ''))) = %s
ORDER BY cc.is_primary DESC, c.id ASC
LIMIT 1
""",
(sender_email,),
)
if contact_match:
contact_name = " ".join(
part for part in [contact_match.get("first_name"), contact_match.get("last_name")] if part
).strip()
return {
"email_id": email_id,
"domain": _extract_sender_domain(sender_email),
"has_customer": False,
"ignored": False,
"suggestion": {
"customer_id": contact_match["id"],
"customer_name": contact_match["name"],
"email_domain": contact_match.get("email_domain"),
"cvr_number": contact_match.get("cvr_number"),
"confidence": "high",
"score": 110,
"source": f"contact_email:{contact_name or sender_email}",
},
}
sender_domain = _extract_sender_domain(email_row.get("sender_email"))
if not sender_domain:
return {
"email_id": email_id,
"domain": None,
"has_customer": False,
"ignored": True,
"reason": "no_domain",
"suggestion": None,
}
if _is_ignored_sender_domain(sender_domain):
return {
"email_id": email_id,
"domain": sender_domain,
"has_customer": False,
"ignored": True,
"reason": "ignored_domain",
"suggestion": None,
}
mapped = execute_query_single(
"""
SELECT c.id, c.name, c.email_domain, c.cvr_number, m.source
FROM email_domain_customer_mappings m
JOIN customers c ON c.id = m.customer_id
WHERE m.domain = %s
AND c.is_active = true
LIMIT 1
""",
(sender_domain,),
)
if mapped:
return {
"email_id": email_id,
"domain": sender_domain,
"has_customer": False,
"ignored": False,
"suggestion": {
"customer_id": mapped["id"],
"customer_name": mapped["name"],
"email_domain": mapped.get("email_domain"),
"cvr_number": mapped.get("cvr_number"),
"confidence": "high",
"score": 100,
"source": f"mapping:{mapped.get('source') or 'manual'}",
},
}
exact = execute_query_single(
"""
SELECT id, name, email_domain, cvr_number
FROM customers
WHERE is_active = true
AND (
LOWER(TRIM(email_domain)) = %s
OR LOWER(TRIM(email_domain)) = %s
)
ORDER BY id ASC
LIMIT 1
""",
(sender_domain, f"www.{sender_domain}"),
)
if exact:
return {
"email_id": email_id,
"domain": sender_domain,
"has_customer": False,
"ignored": False,
"suggestion": {
"customer_id": exact["id"],
"customer_name": exact["name"],
"email_domain": exact.get("email_domain"),
"cvr_number": exact.get("cvr_number"),
"confidence": "high",
"score": 95,
"source": "exact_domain",
},
}
partial = execute_query_single(
"""
SELECT id, name, email_domain, cvr_number
FROM customers
WHERE is_active = true
AND COALESCE(email_domain, '') ILIKE %s
ORDER BY name ASC
LIMIT 1
""",
(f"%{sender_domain}%",),
)
if partial:
return {
"email_id": email_id,
"domain": sender_domain,
"has_customer": False,
"ignored": False,
"suggestion": {
"customer_id": partial["id"],
"customer_name": partial["name"],
"email_domain": partial.get("email_domain"),
"cvr_number": partial.get("cvr_number"),
"confidence": "medium",
"score": 70,
"source": "partial_domain",
},
}
return {
"email_id": email_id,
"domain": sender_domain,
"has_customer": False,
"ignored": False,
"suggestion": None,
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error getting domain customer suggestion: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/domain-customer-mapping")
async def upsert_domain_customer_mapping(payload: DomainMappingUpsertRequest):
"""Persist trusted mapping from sender domain to customer."""
try:
domain = _normalize_domain(payload.domain)
if not domain:
raise HTTPException(status_code=400, detail="domain is required")
customer = execute_query_single(
"SELECT id FROM customers WHERE id = %s AND is_active = true",
(payload.customer_id,),
)
if not customer:
raise HTTPException(status_code=404, detail="Customer not found")
_upsert_domain_mapping(domain, int(payload.customer_id), payload.source or "manual")
return {
"success": True,
"domain": domain,
"customer_id": int(payload.customer_id),
"source": payload.source or "manual",
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error upserting domain mapping: %s", e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/rewrite-text", response_model=RewriteEmailTextResponse)
async def rewrite_email_text(request: RewriteEmailTextRequest):
"""Rewrite email/case text via Ollama using the text_rewrite prompt."""
try:
input_text = (request.text or "").strip()
if not input_text:
raise HTTPException(status_code=400, detail="text is required")
result = await ollama_service.rewrite_text(input_text, request.context or "email")
if not result or result.get("error"):
detail = (result or {}).get("error") or "Could not rewrite text"
raise HTTPException(status_code=502, detail=detail)
return RewriteEmailTextResponse(
rewritten_text=result.get("rewritten_text", ""),
model=result.get("model"),
endpoint=result.get("endpoint"),
context=result.get("context"),
)
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error rewriting email text: %s", e)
raise HTTPException(status_code=500, detail="Failed to rewrite text")
@router.get("/emails/search-sager")
async def search_sager(q: str = Query(..., min_length=1), limit: int = Query(20, ge=1, le=100)):
"""Autocomplete SAG cases for linking emails to existing cases."""
try:
like = f"%{q.strip()}%"
rows = execute_query(
"""
SELECT s.id, s.titel, s.status, s.priority, c.name AS customer_name
FROM sag_sager s
LEFT JOIN customers c ON c.id = s.customer_id
WHERE s.deleted_at IS NULL
AND (
s.titel ILIKE %s
OR COALESCE(s.beskrivelse, '') ILIKE %s
OR CAST(s.id AS TEXT) ILIKE %s
)
ORDER BY s.updated_at DESC
LIMIT %s
""",
(like, like, like, limit)
)
return rows or []
except Exception as e:
logger.error("❌ Error searching sager from email router: %s", e)
raise HTTPException(status_code=500, detail=str(e))
# Email Endpoints
@router.get("/emails", response_model=List[EmailListItem])
async def list_emails(
status: Optional[str] = Query(None),
classification: Optional[str] = Query(None),
q: Optional[str] = Query(None),
limit: int = Query(50, le=500),
offset: int = Query(0, ge=0)
):
"""Get list of emails with filtering"""
try:
where_clauses = ["em.deleted_at IS NULL"]
params = []
if status:
where_clauses.append("em.status = %s")
params.append(status)
if classification:
where_clauses.append("em.classification = %s")
params.append(classification)
if q:
where_clauses.append("(em.subject ILIKE %s OR em.sender_email ILIKE %s OR em.sender_name ILIKE %s)")
search_term = f"%{q}%"
params.extend([search_term, search_term, search_term])
where_sql = " AND ".join(where_clauses)
query = f"""
SELECT
em.id, em.message_id, em.subject, em.sender_email, em.sender_name,
em.received_date, em.classification, em.confidence_score, em.status,
em.is_read, em.has_attachments, em.attachment_count,
em.body_text, em.body_html,
em.linked_case_id,
er.name as rule_name,
v.name as supplier_name,
c.name as customer_name,
s.titel AS linked_case_title
FROM email_messages em
LEFT JOIN email_rules er ON em.rule_id = er.id
LEFT JOIN vendors v ON em.supplier_id = v.id
LEFT JOIN customers c ON em.customer_id = c.id
LEFT JOIN sag_sager s ON em.linked_case_id = s.id
WHERE {where_sql}
ORDER BY em.received_date DESC
LIMIT %s OFFSET %s
"""
params.extend([limit, offset])
result = execute_query(query, tuple(params))
return result
except Exception as e:
logger.error(f"❌ Error listing emails: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/{email_id:int}", response_model=EmailDetail)
async def get_email(email_id: int, request: Request):
"""Get email detail by ID"""
try:
query = """
SELECT em.*,
c.name AS customer_name,
v.name AS supplier_name,
s.titel AS linked_case_title
FROM email_messages em
LEFT JOIN customers c ON em.customer_id = c.id
LEFT JOIN vendors v ON em.supplier_id = v.id
LEFT JOIN sag_sager s ON em.linked_case_id = s.id
WHERE em.id = %s AND em.deleted_at IS NULL
"""
result = execute_query(query, (email_id,))
logger.info(f"🔍 Query result type: {type(result)}, length: {len(result) if result else 0}")
if not result:
raise HTTPException(status_code=404, detail="Email not found")
# Store email before update
email_data = result[0]
# Get attachments
att_query = "SELECT * FROM email_attachments WHERE email_id = %s ORDER BY id"
attachments = execute_query(att_query, (email_id,))
email_data['attachments'] = attachments or []
user_id = getattr(request.state, "user_id", None)
linked_case_id = email_data.get("linked_case_id")
can_mark_read = _can_user_mark_case_email_read(user_id, linked_case_id)
if not bool(email_data.get("is_read")) and can_mark_read:
update_query = "UPDATE email_messages SET is_read = true WHERE id = %s"
execute_update(update_query, (email_id,))
email_data["is_read"] = True
return email_data
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error getting email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/emails/{email_id:int}/read-state")
async def update_email_read_state(email_id: int, payload: EmailReadStateUpdate, request: Request):
"""Toggle read/unread state for an email.
Marking as read on case-linked emails is restricted to case assignee user/group.
"""
try:
row = execute_query_single(
"SELECT id, linked_case_id, is_read FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,),
)
if not row:
raise HTTPException(status_code=404, detail="Email not found")
user_id = getattr(request.state, "user_id", None)
if payload.is_read:
can_mark_read = _can_user_mark_case_email_read(user_id, row.get("linked_case_id"))
if not can_mark_read:
raise HTTPException(status_code=403, detail="Email kan ikke markeres som laest: sag er ikke tildelt dig/din gruppe")
execute_update(
"UPDATE email_messages SET is_read = %s, updated_at = CURRENT_TIMESTAMP WHERE id = %s",
(payload.is_read, email_id),
)
return {"success": True, "email_id": email_id, "is_read": payload.is_read}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error updating read-state for email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/mark-processed")
async def mark_email_processed(email_id: int):
"""Mark email as processed and move to 'Processed' folder"""
try:
# Update email status and folder
update_query = """
UPDATE email_messages
SET status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s AND deleted_at IS NULL
RETURNING id, folder, status
"""
result = execute_query(update_query, (email_id,))
if not result:
raise HTTPException(status_code=404, detail="Email not found")
logger.info(f"✅ Email {email_id} marked as processed and moved to Processed folder")
return {
"success": True,
"email_id": result.get('id') if result else email_id,
"folder": result.get('folder') if result else 'Processed',
"status": result.get('status') if result else 'processed'
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error marking email {email_id} as processed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/emails/{email_id}/attachments/{attachment_id}")
async def download_attachment(email_id: int, attachment_id: int):
"""Download email attachment"""
from fastapi.responses import FileResponse
import os
try:
query = """
SELECT a.* FROM email_attachments a
JOIN email_messages e ON e.id = a.email_id
WHERE a.id = %s AND a.email_id = %s AND e.deleted_at IS NULL
"""
result = execute_query(query, (attachment_id, email_id))
if not result:
raise HTTPException(status_code=404, detail="Attachment not found")
attachment = result[0]
file_path = attachment['file_path']
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(
path=file_path,
filename=attachment['filename'],
media_type=attachment.get('content_type', 'application/octet-stream')
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error downloading attachment {attachment_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put("/emails/{email_id:int}")
async def update_email(email_id: int, status: Optional[str] = None):
"""Update email (archive, mark as read, etc)"""
try:
# Build update fields dynamically
updates = []
params = []
if status:
updates.append("status = %s")
params.append(status)
if not updates:
raise HTTPException(status_code=400, detail="No fields to update")
params.append(email_id)
query = f"UPDATE email_messages SET {', '.join(updates)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s"
execute_update(query, tuple(params))
logger.info(f"✅ Updated email {email_id}: status={status}")
return {"success": True, "message": "Email updated"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error updating email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/emails/{email_id}/link")
async def link_email(email_id: int, payload: Dict):
"""Link email to a customer and/or vendor/supplier"""
try:
updates = []
params = []
if 'customer_id' in payload:
updates.append("customer_id = %s")
params.append(payload['customer_id'])
if 'supplier_id' in payload:
updates.append("supplier_id = %s")
params.append(payload['supplier_id'])
if not updates:
raise HTTPException(status_code=400, detail="Ingen felter at opdatere")
params.append(email_id)
query = f"UPDATE email_messages SET {', '.join(updates)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s"
execute_update(query, tuple(params))
customer_id = payload.get('customer_id')
if customer_id:
email_row = execute_query_single(
"SELECT sender_email FROM email_messages WHERE id = %s",
(email_id,),
)
sender_domain = _extract_sender_domain((email_row or {}).get("sender_email"))
if sender_domain and not _is_ignored_sender_domain(sender_domain):
_upsert_domain_mapping(sender_domain, int(customer_id), "auto_link")
logger.info(f"✅ Linked email {email_id}: {payload}")
return {"success": True, "message": "Email linket"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Error linking email {email_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/create-sag")
async def create_sag_from_email(email_id: int, payload: CreateSagFromEmailRequest):
"""Create a new SAG from an email and persist the email-case relation."""
try:
email_row = execute_query(
"SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_row:
raise HTTPException(status_code=404, detail="Email not found")
email_data = email_row[0]
# Idempotent safeguard: repeated clicks should return existing linked case.
existing_sag_id = email_data.get('linked_case_id')
if existing_sag_id:
existing_sag = execute_query_single(
"""
SELECT id, titel, customer_id, status, template_key, priority, start_date, deadline, created_at
FROM sag_sager
WHERE id = %s AND deleted_at IS NULL
""",
(existing_sag_id,),
)
if existing_sag:
execute_update(
"""
INSERT INTO sag_emails (sag_id, email_id)
VALUES (%s, %s)
ON CONFLICT (sag_id, email_id) DO NOTHING
""",
(existing_sag_id, email_id),
)
return {
"success": True,
"email_id": email_id,
"sag": existing_sag,
"idempotent": True,
"message": "E-mail er allerede knyttet til eksisterende SAG"
}
requested_case_type = _normalize_case_type(payload.case_type)
customer_id = payload.customer_id or email_data.get('customer_id')
if not customer_id and _is_supplier_case_type(requested_case_type):
customer_id = _ensure_customer_from_vendor(email_data.get('supplier_id'))
if not customer_id and _is_supplier_case_type(requested_case_type):
customer_id = _resolve_procurement_customer_id()
if not customer_id:
raise HTTPException(status_code=400, detail="customer_id is required (missing on email and payload)")
if not email_data.get('customer_id') and customer_id:
execute_update(
"UPDATE email_messages SET customer_id = %s, updated_at = CURRENT_TIMESTAMP WHERE id = %s",
(customer_id, email_id),
)
sender_domain = _extract_sender_domain(email_data.get("sender_email"))
if sender_domain and not _is_ignored_sender_domain(sender_domain):
_upsert_domain_mapping(sender_domain, int(customer_id), "supplier_auto")
titel = (payload.titel or email_data.get('subject') or f"E-mail fra {email_data.get('sender_email', 'ukendt afsender')}").strip()
beskrivelse = payload.beskrivelse or email_data.get('body_text') or email_data.get('body_html') or ''
template_key = requested_case_type[:50]
priority = (payload.priority or 'normal').strip().lower()
if priority not in {'low', 'normal', 'high', 'urgent'}:
raise HTTPException(status_code=400, detail="priority must be one of: low, normal, high, urgent")
case_result = execute_query(
"""
INSERT INTO sag_sager
(titel, beskrivelse, template_key, status, customer_id, ansvarlig_bruger_id, assigned_group_id, created_by_user_id, priority, start_date, deadline)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id, titel, customer_id, status, template_key, priority, start_date, deadline, created_at
""",
(
titel,
beskrivelse,
template_key,
'åben',
customer_id,
payload.ansvarlig_bruger_id,
payload.assigned_group_id,
payload.created_by_user_id,
priority,
payload.start_date,
payload.deadline,
)
)
if not case_result:
raise HTTPException(status_code=500, detail="Failed to create SAG")
sag = case_result[0]
sag_id = sag['id']
# Link email to SAG (audit trail)
execute_update(
"""
INSERT INTO sag_emails (sag_id, email_id)
VALUES (%s, %s)
ON CONFLICT (sag_id, email_id) DO NOTHING
""",
(sag_id, email_id)
)
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(sag_id, email_id)
)
attachments_linked = 0
try:
# Reuse workflow helper so attachments become real sag_files entries.
attachments_linked = int(email_workflow_service._copy_email_attachments_to_case(email_id, sag_id, None) or 0)
if attachments_linked > 0:
logger.info(
"📎 Linked %s attachment(s) from email %s to SAG-%s during create-sag",
attachments_linked,
email_id,
sag_id,
)
except Exception as attach_exc:
logger.warning(
"⚠️ Could not auto-link attachments from email %s to SAG-%s: %s",
email_id,
sag_id,
attach_exc,
)
if payload.contact_id:
execute_update(
"""
INSERT INTO sag_kontakter (sag_id, contact_id, role)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
""",
(sag_id, payload.contact_id, 'primary')
)
relation_type = (payload.relation_type or 'mail').strip().lower()
if relation_type not in ALLOWED_SAG_EMAIL_RELATION_TYPES:
raise HTTPException(status_code=400, detail="relation_type must be 'mail'")
return {
"success": True,
"email_id": email_id,
"sag": sag,
"attachments_linked": attachments_linked,
"message": "SAG oprettet fra e-mail"
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error creating SAG from email %s: %s", email_id, e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/link-sag")
async def link_email_to_sag(email_id: int, payload: LinkEmailToSagRequest):
"""Link an email to an existing SAG and optionally append a system note."""
try:
email_row = execute_query(
"SELECT id, subject, sender_email FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_row:
raise HTTPException(status_code=404, detail="Email not found")
sag_row = execute_query(
"SELECT id, titel FROM sag_sager WHERE id = %s AND deleted_at IS NULL",
(payload.sag_id,)
)
if not sag_row:
raise HTTPException(status_code=404, detail="SAG not found")
execute_update(
"""
INSERT INTO sag_emails (sag_id, email_id)
VALUES (%s, %s)
ON CONFLICT (sag_id, email_id) DO NOTHING
""",
(payload.sag_id, email_id)
)
if payload.mark_processed:
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
status = 'processed',
folder = 'Processed',
processed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(payload.sag_id, email_id)
)
else:
execute_update(
"""
UPDATE email_messages
SET linked_case_id = %s,
updated_at = CURRENT_TIMESTAMP
WHERE id = %s
""",
(payload.sag_id, email_id)
)
relation_type = (payload.relation_type or 'mail').strip().lower()
if relation_type not in ALLOWED_SAG_EMAIL_RELATION_TYPES:
raise HTTPException(status_code=400, detail="relation_type must be 'mail'")
return {
"success": True,
"email_id": email_id,
"sag_id": payload.sag_id,
"message": "E-mail knyttet til SAG"
}
except HTTPException:
raise
except Exception as e:
logger.error("❌ Error linking email %s to SAG %s: %s", email_id, payload.sag_id, e)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/emails/{email_id}/extract-vendor-suggestion")
async def extract_vendor_suggestion(email_id: int):
"""
Udtrækker leverandørinfo fra email body og vedhæftede PDF-fakturaer.
Bruger stærke regex-mønstre + AI for CVR, adresse, telefon, domæne.
"""
import re
import os
# ── Hjælpefunktioner ────────────────────────────────────────────────────
def clean_phone(raw: str) -> str:
"""Normaliser telefonnummer til +45 XXXX XXXX eller 8 cifre"""
digits = re.sub(r'[^\d+]', '', raw)
if digits.startswith('+45') and len(digits) == 11:
return digits
if digits.startswith('45') and len(digits) == 10:
return '+' + digits
bare = re.sub(r'\D', '', raw)
if len(bare) == 8:
return bare
return raw.strip()[:20]
# Kendte faktureringsplatforme der sender på vegne af leverandører
PLATFORM_DOMAINS = {
'e-conomic.com', 'e-conomic.dk', 'dinero.dk', 'billy.dk',
'uniconta.com', 'visma.com', 'simplybilling.dk', 'fakturasend.dk',
'invoicecloud.com', 'invoiced.com', 'stripe.com', 'paypal.com',
}
# Placeholder-adresser (e-conomic og lignende skabeloner)
PLACEHOLDER_ADDRESSES = {
'vejnavn 1, 1234 by', 'vejnavn 1,1234 by', 'vejnavn 1',
'1234 by', 'adresse 1', 'eksempel 1', 'gadenavn 1',
}
def is_placeholder_cvr(cvr: str) -> bool:
"""Filtrer åbenlyse placeholder/dummy CVRs"""
known_fakes = {
'12345678', '87654321', '00000000', '11111111', '22222222',
'33333333', '44444444', '55555555', '66666666', '77777777',
'88888888', '99999999', '12341234', '11223344', '99887766',
}
if cvr in known_fakes:
return True
if len(set(cvr)) == 1:
return True
digits = [int(c) for c in cvr]
if all(digits[i+1] - digits[i] == 1 for i in range(7)):
return True
return False
def extract_cvr(text: str, own_cvr: str = '') -> Optional[str]:
patterns = [
# Med label
r'(?:CVR|Cvr\.?-?nr\.?|cvr|Moms(?:nr\.?|registrerings?nr\.?)|VAT\s*(?:no\.?|nr\.?|number))[:\s.\-–]*(?:DK)?[\s\-]?(\d{8})',
# DK-præfiks
r'\bDK[\s\-]?(\d{8})\b',
# Standalone 8 cifre (sidst – mindst specifik)
r'\b(\d{8})\b',
]
for pat in patterns:
for m in re.finditer(pat, text, re.IGNORECASE):
val = m.group(1)
if val != own_cvr and val.isdigit() and not is_placeholder_cvr(val):
return val
return None
def extract_phones(text: str) -> Optional[str]:
patterns = [
# Med label
r'(?:Tlf\.?|Tel\.?|Telefon|Phone|Mobil|Fax)[:\s.\-–]*(\+?[\d][\d\s\-().]{6,18})',
# +45 XXXXXXXX
r'(\+45[\s\-]?\d{2}[\s\-]?\d{2}[\s\-]?\d{2}[\s\-]?\d{2})',
# 8 cifre i grupper: 12 34 56 78 / 1234 5678
r'\b(\d{2}[\s\-]\d{2}[\s\-]\d{2}[\s\-]\d{2})\b',
r'\b(\d{4}[\s\-]\d{4})\b',
]
for pat in patterns:
m = re.search(pat, text, re.IGNORECASE)
if m:
return clean_phone(m.group(1))
return None
def extract_address(text: str) -> Optional[str]:
# Format 1: "Vejnavn 12K[, etage/side][ -/,] 4000 By"
# Håndterer: "Jernbanegade 12K, st.tv - 4000 Roskilde"
# "Nørregade 5, 1. sal, 8000 Aarhus"
# "Industrivej 3 - 2200 København N"
m = re.search(
r'([A-ZÆØÅ][a-zæøåA-ZÆØÅ\-\.]{2,}\s+\d+[A-Za-z]?'
r'(?:[,\s]+[a-zæøåA-ZÆØÅ0-9][a-zæøåA-ZÆØÅ0-9\.\s]{0,15}?)?'
r'(?:[,\s]+|\s*[-–]\s*)\d{4}\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]{2,})',
text
)
if m:
return m.group(0).strip()
# Format 2: vejnavn-suffix (vej/gade/alle osv.) + husnummer + postnummer
m = re.search(
r'([A-ZÆØÅ][a-zæøåA-ZÆØÅ]+(?:vej|gade|alle|vænge|torv|plads|stræde|boulevard|have|bakke|skov|park|strand|mark|eng)'
r'\s*\d+[A-Za-z]?(?:[,\s]+|\s*[-–]\s*)\d{4}(?:\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]+)?)',
text, re.IGNORECASE
)
if m:
return m.group(0).strip()
# Format 3: find postnummer + by, tag kontekst foran
m = re.search(r'(\d{4}\s+[A-ZÆØÅ][a-zæøåA-ZÆØÅ]{2,})', text)
if m:
start = max(0, m.start() - 50)
snippet = text[start:m.end()].strip().lstrip('-–, ')
return snippet
return None
def is_platform_or_spam(domain: str) -> bool:
"""Returnerer True hvis domænet tilhører en platform/mailsystem"""
spam = {'gmail.com', 'hotmail.com', 'outlook.com', 'yahoo.com', 'live.com', 'icloud.com'}
return domain in spam or domain in PLATFORM_DOMAINS or 'bmc' in domain
def extract_domain(text: str, sender_email: str = '') -> Optional[str]:
# Eksplicit www
m = re.search(r'(?:www\.|https?://)([\w\-]+\.[\w\-]+(?:\.[\w]{2,6})?)', text, re.IGNORECASE)
if m:
dom = m.group(1).lower()
if not is_platform_or_spam(dom):
return dom
# Emailadresser i teksten
for em in re.finditer(r'[\w.\-+]+@([\w\-]+\.[\w\-]+(?:\.[\w]{2,6})?)', text):
dom = em.group(1).lower()
if not is_platform_or_spam(dom):
return dom
# Sender email (kun hvis ikke platform)
if sender_email and '@' in sender_email:
dom = sender_email.split('@')[1].lower()
if not is_platform_or_spam(dom):
return dom
return None
def extract_company_name(text: str, sender_name: str = '') -> Optional[str]:
"""Find firmanavn via DK-suffikser, footer-mønster eller CVR-nær tekst"""
# Prioritet 1: navne med DK juridiske suffikser
m = re.search(
r'\b([\w\s\-&\'\.]+(?:A/S|ApS|IVS|I/S|K/S|P/S|GmbH|Ltd\.?|LLC|AB|AS))\b',
text
)
if m:
return m.group(1).strip()
# Prioritet 2: e-conomic footer: "FirmaNavn - Adresse - ..."
# Virker både med og uden linjeskift foran
m = re.search(
r'(?:^|\n)([A-ZÆØÅ][A-Za-zæøåÆØÅ][^\n\-]{1,40}?)\s*[-–]\s*[A-ZÆØÅ][a-zæøåA-ZÆØÅ]',
text, re.MULTILINE
)
if m:
name = m.group(1).strip()
if len(name) > 2 and not any(w in name.lower() for w in ('tlf', 'tel', 'mail', 'bank', 'cvr', 'mobil', 'kontonr', 'faktura')):
return name
# Prioritet 3: tekst umiddelbart FORAN "CVR" (typisk "FirmaNavn CVR-nr.")
m = re.search(
r'([A-ZÆØÅ][A-Za-zæøåÆØÅ\s&\'\.]{2,50}?)\s*[-–,]?\s*(?:CVR|cvr)',
text
)
if m:
name = m.group(1).strip().rstrip('-–, \t')
if len(name) > 2 and not any(w in name.lower() for w in ('tlf', 'mail', 'bank')):
return name
return sender_name or None
def parse_vendor_footer(text: str, own_cvr: str = '') -> dict:
"""
Parser specifikt til e-conomic/Dinero footer-format:
"KONI Accounting - Jernbanegade 12K, st.tv - 4000 Roskilde - DK - CVR-nr.: 35962344"
Splitter på ' - ' og identificerer segmenterne.
"""
result = {}
# Find linjer der indeholder både vejnavn/postnummer OG CVR-lignende mønstre
# eller blot det klassiske "Firma - Adresse - Postnr By" mønster
for line in text.replace('\r', '\n').split('\n'):
line = line.strip()
if len(line) < 10:
continue
# Forsøg: split på ' - ' eller ' – '
parts = re.split(r'\s*[-–]\s*', line)
if len(parts) < 3:
continue
# Del 0 er typisk firmanavnet (ingen tal, ingen '@')
# Del 1 er typisk adressen (indeholder tal + vejnavn)
# Del 2 (eller del med 4 cifre) er postnummer + by
name_candidate = parts[0].strip()
if not name_candidate or any(c.isdigit() for c in name_candidate[:3]):
continue
if any(w in name_candidate.lower() for w in ('tlf', 'tel', 'mail', 'bank', 'cvr', 'mobil', 'kontonr')):
continue
# Find adresse-del (indeholder et vejnummer: bogstaver + tal)
addr_part = None
zip_city_part = None
for part in parts[1:]:
part = part.strip()
# Postnummer-format: 4 cifre + by
if re.match(r'^\d{4}\s+[A-ZÆØÅ]', part):
zip_city_part = part
elif re.search(r'\d', part) and addr_part is None:
# Del med tal = adresse
if not re.match(r'^DK$', part.strip(), re.IGNORECASE):
addr_part = part
if name_candidate and (addr_part or zip_city_part):
result['name'] = name_candidate
if addr_part and zip_city_part:
result['address'] = f"{addr_part}, {zip_city_part}"
elif addr_part:
result['address'] = addr_part
elif zip_city_part:
result['address'] = zip_city_part
# Find CVR i denne linje
cvr_m = re.search(r'CVR[^:]*:\s*(\d{8})', line, re.IGNORECASE)
if cvr_m:
val = cvr_m.group(1)
if val != own_cvr and not is_placeholder_cvr(val):
result['cvr_number'] = val
# Find telefon i denne linje
phone_m = re.search(r'(?:Tlf|Tel|Mobil)[.:]?\s*(\+?[\d][\d\s\-]{6,15})', line, re.IGNORECASE)
if phone_m:
result['phone'] = clean_phone(phone_m.group(1))
# Find email i denne linje
email_m = re.search(r'(?:Mail|E-mail|Email)[.:]?\s*([\w.\-+]+@[\w\-]+\.[\w\-]+)', line, re.IGNORECASE)
if email_m:
dom = email_m.group(1).split('@')[1].lower()
if dom not in PLATFORM_DOMAINS and 'bmc' not in dom:
result['email'] = email_m.group(1)
result['domain'] = dom
if result.get('name') or result.get('cvr_number'):
break # Første matchende linje er nok
return result
# ── Hoved-logik ─────────────────────────────────────────────────────────
try:
email_result = execute_query(
"SELECT * FROM email_messages WHERE id = %s AND deleted_at IS NULL",
(email_id,)
)
if not email_result:
raise HTTPException(status_code=404, detail="Email ikke fundet")
email = email_result[0]
from app.core.config import settings
own_cvr = getattr(settings, 'OWN_CVR', '')
# ── Hurtig genvej: brug allerede-udtrukket vendor-data fra PDF ──────
# (sat af email_analysis_service ved email-modtagelse, v2.2.18+)
pre_cvr = email.get('extracted_vendor_cvr')
pre_name = email.get('extracted_vendor_name')
if pre_cvr and pre_cvr != own_cvr and not is_placeholder_cvr(pre_cvr):
# Forsøg CVR-opslag i vendors-tabel
vendor_row = execute_query_single(
"SELECT id, name, cvr_number, phone, email, address FROM vendors WHERE cvr_number = %s",
(pre_cvr,)
)
suggestion = {
"name": (vendor_row and vendor_row.get('name')) or pre_name or None,
"cvr_number": pre_cvr,
"phone": (vendor_row and vendor_row.get('phone')) or None,
"email": (vendor_row and vendor_row.get('email')) or None,
"address": (vendor_row and vendor_row.get('address')) or None,
"domain": None,
"source": "pdf_extraction",
"vendor_id": vendor_row.get('id') if vendor_row else None,
"match_score": 100 if vendor_row else 0,
"confidence": 0.95,
}
logger.info(
f"⚡ Hurtig vendor-suggestion fra PDF-extraction for email {email_id}: "
f"CVR={pre_cvr}, vendor_id={suggestion['vendor_id']}"
)
return suggestion
def resolve_file_path(raw_path: str) -> Optional[str]:
"""Løs relativ/absolut filsti — prøv /app-prefix i Docker"""
import os
if os.path.exists(raw_path):
return raw_path
# Docker: CWD er /app, så prøv begge varianter
for base in ('/app', '/app/app', ''):
candidate = os.path.join(base, raw_path.lstrip('/'))
if os.path.exists(candidate):
return candidate
return None
def html_to_text(html: str) -> str:
"""Fjern HTML-tags og decode entities til plain text"""
import html as html_lib
# Fjern style/script blokke
text = re.sub(r'<(style|script)[^>]*>.*?\1>', '', html, flags=re.DOTALL | re.IGNORECASE)
# Erstat
,
,