bmc_hub/app/billing/backend/supplier_invoices.py

3445 lines
139 KiB
Python
Raw Normal View History

"""
Supplier Invoices Router - Leverandørfakturaer (Kassekladde)
Backend API for managing supplier invoices that integrate with e-conomic
"""
from fastapi import APIRouter, HTTPException, UploadFile, File, BackgroundTasks
from pydantic import BaseModel
from typing import List, Dict, Optional
from datetime import datetime, date, timedelta
from decimal import Decimal
from pathlib import Path
from app.core.database import execute_query, execute_insert, execute_update, execute_query_single
from app.core.config import settings
from app.services.economic_service import get_economic_service
from app.services.ollama_service import ollama_service
from app.services.template_service import template_service
from app.services.invoice2data_service import get_invoice2data_service
import logging
import os
import re
logger = logging.getLogger(__name__)
router = APIRouter()
def _smart_extract_lines(text: str) -> List[Dict]:
"""
Universal line extraction using pdfplumber layout mode.
Tries pdfplumber columnar format first, then falls back to vendor-specific patterns.
"""
lines_arr = text.split('\n')
items = []
i = 0
while i < len(lines_arr):
line = lines_arr[i].strip()
# Skip empty or header lines
if not line or re.search(r'(Position|Varenr|Beskrivelse|Antal|Pris|Total|Model)', line, re.IGNORECASE):
i += 1
continue
# Pattern 1: pdfplumber layout mode - " 1 95006 Betalingsmetode... 1 41,20 41,20"
# Whitespace-separated columns: position item_number description quantity unit_price total_price
# Most specific pattern - try first!
layout_match = re.match(r'^\s*(\d{1,2})\s+(\d{4,10})\s+(.+?)\s(\d{1,2})\s+([\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$', line)
if layout_match:
items.append({
'line_number': len(items) + 1,
'position': layout_match.group(1),
'item_number': layout_match.group(2),
'description': layout_match.group(3).strip(),
'quantity': layout_match.group(4),
'unit_price': layout_match.group(5).replace(' ', '').replace(',', '.'),
'total_price': layout_match.group(6).replace(' ', '').replace(',', '.'),
'raw_text': line
})
logger.info(f"✅ pdfplumber layout: {layout_match.group(2)} - {layout_match.group(3)[:30]}...")
i += 1
continue
# Pattern 2: ALSO format - "100 48023976 REFURB LENOVO..." (multi-line)
item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line)
if item_match:
position = item_match.group(1)
item_number = item_match.group(2)
description = item_match.group(3).strip()
# Find næste linje med antal+priser
quantity = None
unit_price = None
total_price = None
for j in range(i+1, min(i+10, len(lines_arr))):
price_line = lines_arr[j].strip()
price_match = re.match(r'^(\d+)\s*(?:ST|stk|pc|pcs)\s+([\d.,]+)\s+([\d.,]+)', price_line, re.IGNORECASE)
if price_match:
quantity = price_match.group(1)
unit_price = price_match.group(2).replace(',', '.')
total_price = price_match.group(3).replace(',', '.')
break
if quantity and unit_price:
items.append({
'line_number': len(items) + 1,
'position': position,
'item_number': item_number,
'description': description,
'quantity': quantity,
'unit_price': unit_price,
'total_price': total_price,
'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}"
})
logger.info(f"✅ ALSO: {item_number} - {description[:30]}...")
i += 1
continue
# Pattern 3: DCS single-line - "195006Betalingsmetode... 141,2041,20" (legacy PyPDF2 format)
# Position: 1 digit, Item: 4-10 digits, Description starts with letter
# Prices: Danish format 1-3 digits, comma, 2 decimals (e.g., 41,20 or 619,00)
# Quantity: 1-2 digits (non-greedy) before first price
dcs_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+?)(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})$', line)
if dcs_match:
items.append({
'line_number': len(items) + 1,
'position': dcs_match.group(1),
'item_number': dcs_match.group(2),
'description': dcs_match.group(3).strip(),
'quantity': dcs_match.group(4),
'unit_price': dcs_match.group(5).replace(',', '.'),
'total_price': dcs_match.group(6).replace(',', '.'),
'raw_text': line
})
logger.info(f"✅ DCS single-line: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...")
i += 1
continue
# Pattern 4: DCS multi-line - "2994922511Ubiquiti..." then search for "...USW-FLEX 1619,00619,00" (legacy)
dcs_multi_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+)$', line)
if dcs_multi_match and not re.search(r'KN8|EAN|Model|Position|Varenr|Tekst', line):
position = dcs_multi_match.group(1)
item_number = dcs_multi_match.group(2)
description = dcs_multi_match.group(3).strip()
# Search next 5 lines for quantity/prices (Danish format 1-3 digits before comma)
for j in range(1, 6):
if i + j >= len(lines_arr):
break
price_line = lines_arr[i + j].strip()
# Match: "S/N: ...USW-FLEX 1619,00619,00" - qty (1-2 digits, non-greedy) + TWO prices
price_match = re.search(r'(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})\s*$', price_line)
if price_match:
quantity = price_match.group(1)
unit_price = price_match.group(2).replace(',', '.')
total_price = price_match.group(3).replace(',', '.')
items.append({
'line_number': len(items) + 1,
'position': position,
'item_number': item_number,
'description': description,
'quantity': quantity,
'unit_price': unit_price,
'total_price': total_price,
'raw_text': f"{line} ... {price_line}"
})
logger.info(f"✅ DCS multi-line: {item_number} - {description[:30]}...")
break
i += 1
continue
i += 1
if items:
logger.info(f"📦 Multi-line extraction found {len(items)} items")
else:
logger.warning("⚠️ Multi-line extraction found no items")
return items
# ========== CRUD OPERATIONS ==========
@router.get("/supplier-invoices")
async def list_supplier_invoices(
status: Optional[str] = None,
vendor_id: Optional[int] = None,
overdue_only: bool = False
):
"""
List all supplier invoices with filtering options
Args:
status: Filter by status (pending, approved, sent_to_economic, paid, overdue, cancelled)
vendor_id: Filter by vendor
overdue_only: Only show overdue unpaid invoices
"""
try:
query = """
SELECT
si.*,
v.name as vendor_full_name,
v.economic_supplier_number as vendor_economic_id,
CASE
WHEN si.paid_date IS NOT NULL THEN 'paid'
WHEN si.due_date < CURRENT_DATE AND si.paid_date IS NULL THEN 'overdue'
ELSE si.status
END as computed_status
FROM supplier_invoices si
LEFT JOIN vendors v ON si.vendor_id = v.id
WHERE 1=1
"""
params = []
if status:
query += " AND si.status = %s"
params.append(status)
if vendor_id:
query += " AND si.vendor_id = %s"
params.append(vendor_id)
if overdue_only:
query += " AND si.due_date < CURRENT_DATE AND si.paid_date IS NULL"
query += " ORDER BY si.due_date ASC, si.invoice_date DESC"
invoices = execute_query(query, tuple(params) if params else ())
# Add lines to each invoice
for invoice in invoices:
lines = execute_query(
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
(invoice['id'],)
)
invoice['lines'] = lines
return invoices
except Exception as e:
logger.error(f"❌ Failed to list supplier invoices: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/pending-supplier-invoice-files")
async def get_pending_files():
"""Hent alle filer der venter på behandling, inkl. AI-extracted"""
try:
# Hent både pending files OG ai_extracted files
files = execute_query(
"""SELECT DISTINCT ON (f.file_id)
f.file_id,
f.filename,
f.status,
f.uploaded_at,
f.error_message,
f.template_id,
f.file_path,
-- Quick analysis results (available immediately on upload)
f.detected_cvr,
f.detected_vendor_id,
f.detected_document_type,
f.detected_document_number,
f.is_own_invoice,
v_detected.name as detected_vendor_name,
v_detected.cvr_number as detected_vendor_cvr,
-- Get vendor info from latest extraction
ext.vendor_name,
ext.vendor_cvr,
ext.vendor_matched_id,
v.name as matched_vendor_name,
v.cvr_number as matched_vendor_cvr_number,
-- Check if already has invoice via latest extraction only
si.id as existing_invoice_id,
si.invoice_number as existing_invoice_number
FROM incoming_files f
LEFT JOIN vendors v_detected ON v_detected.id = f.detected_vendor_id
LEFT JOIN LATERAL (
SELECT extraction_id, file_id, vendor_name, vendor_cvr, vendor_matched_id
FROM extractions
WHERE file_id = f.file_id
ORDER BY created_at DESC
LIMIT 1
) ext ON true
LEFT JOIN vendors v ON v.id = ext.vendor_matched_id
LEFT JOIN supplier_invoices si ON si.extraction_id = ext.extraction_id
WHERE f.status IN ('pending', 'processing', 'failed', 'ai_extracted', 'processed', 'duplicate')
AND si.id IS NULL -- Only show files without invoice yet
ORDER BY f.file_id, f.uploaded_at DESC"""
)
# Convert to regular dicts so we can add new keys
files = [dict(file) for file in files] if files else []
# Check for invoice2data templates for each file
try:
from app.services.invoice2data_service import get_invoice2data_service
invoice2data = get_invoice2data_service()
logger.info(f"📋 Checking invoice2data templates: {len(invoice2data.templates)} loaded")
for file in files:
# Check if there's an invoice2data template for this vendor's CVR or name
vendor_cvr = file.get('matched_vendor_cvr_number') or file.get('detected_vendor_cvr') or file.get('vendor_cvr')
vendor_name = file.get('vendor_name') or file.get('detected_vendor_name') or file.get('matched_vendor_name')
file['has_invoice2data_template'] = False
logger.debug(f" File {file['file_id']}: CVR={vendor_cvr}, name={vendor_name}")
# Check all templates
for template_name, template_data in invoice2data.templates.items():
keywords = template_data.get('keywords', [])
logger.debug(f" Template {template_name}: keywords={keywords}")
# Match by CVR
if vendor_cvr and str(vendor_cvr) in [str(k) for k in keywords]:
file['has_invoice2data_template'] = True
file['invoice2data_template_name'] = template_name
logger.info(f" ✅ File {file['file_id']} matched template {template_name} by CVR")
break
# Match by vendor name
if vendor_name:
for keyword in keywords:
if str(keyword).upper() in str(vendor_name).upper():
file['has_invoice2data_template'] = True
file['invoice2data_template_name'] = template_name
logger.info(f" ✅ File {file['file_id']} matched template {template_name} by name")
break
if file['has_invoice2data_template']:
break
except Exception as e:
logger.error(f"❌ Failed to check invoice2data templates: {e}", exc_info=True)
# Continue without invoice2data info
return {"files": files if files else [], "count": len(files) if files else 0}
except Exception as e:
logger.error(f"❌ Failed to get pending files: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files")
async def get_files_by_status(status: Optional[str] = None, limit: int = 100):
"""
Get files filtered by status(es)
Query params:
- status: Comma-separated list of statuses (e.g., "pending,extraction_failed")
- limit: Maximum number of results
"""
try:
# Parse status filter
status_list = []
if status:
status_list = [s.strip() for s in status.split(',')]
# Build query
if status_list:
placeholders = ','.join(['%s'] * len(status_list))
query = f"""
SELECT f.file_id, f.filename, f.file_path, f.file_size, f.mime_type,
f.status, f.uploaded_at, f.processed_at, f.detected_cvr,
f.detected_vendor_id, v.name as detected_vendor_name,
ext.vendor_name,
ext.vendor_cvr,
ext.vendor_matched_id,
COALESCE(v_ext.name, ext.vendor_name, v.name) as best_vendor_name,
ext.total_amount,
ext.confidence as vendor_match_confidence
FROM incoming_files f
LEFT JOIN vendors v ON f.detected_vendor_id = v.id
LEFT JOIN LATERAL (
SELECT vendor_name, vendor_cvr, vendor_matched_id, total_amount, confidence
FROM extractions
WHERE file_id = f.file_id
ORDER BY created_at DESC
LIMIT 1
) ext ON true
LEFT JOIN vendors v_ext ON v_ext.id = ext.vendor_matched_id
WHERE f.status IN ({placeholders})
ORDER BY f.uploaded_at DESC
LIMIT %s
"""
params = tuple(status_list) + (limit,)
else:
query = """
SELECT f.file_id, f.filename, f.file_path, f.file_size, f.mime_type,
f.status, f.uploaded_at, f.processed_at, f.detected_cvr,
f.detected_vendor_id, v.name as detected_vendor_name,
ext.vendor_name,
ext.vendor_cvr,
ext.vendor_matched_id,
COALESCE(v_ext.name, ext.vendor_name, v.name) as best_vendor_name,
ext.total_amount,
ext.confidence as vendor_match_confidence
FROM incoming_files f
LEFT JOIN vendors v ON f.detected_vendor_id = v.id
LEFT JOIN LATERAL (
SELECT vendor_name, vendor_cvr, vendor_matched_id, total_amount, confidence
FROM extractions
WHERE file_id = f.file_id
ORDER BY created_at DESC
LIMIT 1
) ext ON true
LEFT JOIN vendors v_ext ON v_ext.id = ext.vendor_matched_id
ORDER BY f.uploaded_at DESC
LIMIT %s
"""
params = (limit,)
files = execute_query(query, params)
if not files:
files = []
return {
"count": len(files),
"files": files
}
except Exception as e:
logger.error(f"❌ Failed to get files: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/pdf-text")
async def get_file_pdf_text(file_id: int):
"""Hent fuld PDF tekst fra en uploaded fil (til template builder)"""
try:
# Get file info
file_info = execute_query(
"SELECT file_path, filename FROM incoming_files WHERE file_id = %s",
(file_id,))
if not file_info:
raise HTTPException(status_code=404, detail="Fil ikke fundet")
file_data = file_info[0]
# Read PDF text
from pathlib import Path
file_path = Path(file_data['file_path'])
if not file_path.exists():
raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}")
pdf_text = await ollama_service._extract_text_from_file(file_path)
return {
"file_id": file_id,
"filename": file_data['filename'],
"pdf_text": pdf_text
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to get PDF text: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/extracted-data")
async def get_file_extracted_data(file_id: int):
"""Hent AI-extracted data fra en uploaded fil"""
try:
# Get file info
file_info = execute_query_single(
"SELECT * FROM incoming_files WHERE file_id = %s",
(file_id,))
if not file_info:
raise HTTPException(status_code=404, detail="Fil ikke fundet")
# Get extraction results if exists
extraction = execute_query_single(
"SELECT * FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
(file_id,))
# Parse llm_response_json if it exists (from AI or template extraction)
llm_json_data = None
if extraction and extraction.get('llm_response_json'):
import json
try:
raw_json = extraction['llm_response_json']
# Always parse if it's a string, even if psycopg2 returns it as JSON type
if isinstance(raw_json, str):
llm_json_data = json.loads(raw_json)
elif isinstance(raw_json, dict):
llm_json_data = raw_json
else:
# Fallback: try to parse as string
llm_json_data = json.loads(str(raw_json))
logger.info(f"📊 Parsed llm_response_json: invoice_number={llm_json_data.get('invoice_number')}")
except Exception as e:
logger.error(f"❌ Failed to parse llm_response_json: {e}")
# Get extraction lines if exist (use execute_query for multiple rows)
extraction_lines = []
if extraction:
extraction_lines = execute_query(
"""SELECT * FROM extraction_lines
WHERE extraction_id = %s
ORDER BY line_number""",
(extraction['extraction_id'],)
)
# Read PDF text if needed
pdf_text = None
if file_info and file_info.get('file_path'):
from pathlib import Path
file_path = Path(file_info['file_path'])
if file_path.exists():
pdf_text = await ollama_service._extract_text_from_file(file_path)
# Format line items for frontend
formatted_lines = []
if extraction_lines:
for line in extraction_lines:
formatted_lines.append({
"description": line.get('description'),
"quantity": float(line.get('quantity')) if line.get('quantity') else None,
"unit_price": float(line.get('unit_price')) if line.get('unit_price') else None,
"vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None,
"line_total": float(line.get('line_total')) if line.get('line_total') else None,
"vat_note": line.get('vat_note')
})
elif llm_json_data and llm_json_data.get('lines'):
# Use lines from LLM JSON response
for line in llm_json_data['lines']:
formatted_lines.append({
"description": line.get('description'),
"quantity": float(line.get('quantity')) if line.get('quantity') else None,
"unit_price": float(line.get('unit_price')) if line.get('unit_price') else None,
"vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None,
"line_total": float(line.get('line_total')) if line.get('line_total') else None,
"vat_note": line.get('vat_note')
})
# Build llm_data response
llm_data = None
if llm_json_data:
# Normalize common invoice2data field names to our API schema
total_amount_value = llm_json_data.get('total_amount')
if total_amount_value is None:
total_amount_value = llm_json_data.get('amount_total')
invoice_date_value = llm_json_data.get('invoice_date')
if invoice_date_value is None:
invoice_date_value = llm_json_data.get('document_date')
due_date_value = llm_json_data.get('due_date')
# Vendor name: AI uses 'vendor_name', invoice2data uses 'issuer'
vendor_name_val = (
llm_json_data.get('vendor_name') or
llm_json_data.get('issuer') or
(extraction.get('vendor_name') if extraction else None)
)
# Vendor CVR: AI uses 'vendor_cvr', invoice2data uses 'vendor_vat'
vendor_cvr_val = (
llm_json_data.get('vendor_cvr') or
llm_json_data.get('vendor_vat') or
(extraction.get('vendor_cvr') if extraction else None)
)
# Vendor address: AI uses 'vendor_address', invoice2data may have separate fields
vendor_address_val = (
llm_json_data.get('vendor_address') or
llm_json_data.get('supplier_address') or
llm_json_data.get('vendor_street')
)
vendor_city_val = llm_json_data.get('vendor_city') or llm_json_data.get('city')
vendor_postal_val = llm_json_data.get('vendor_postal_code') or llm_json_data.get('postal_code')
vendor_email_val = llm_json_data.get('vendor_email') or llm_json_data.get('supplier_email')
# Use invoice_number from LLM JSON (works for both AI and template extraction)
llm_data = {
"invoice_number": llm_json_data.get('invoice_number'),
"invoice_date": invoice_date_value,
"due_date": due_date_value,
"total_amount": float(total_amount_value) if total_amount_value else None,
"currency": llm_json_data.get('currency') or 'DKK',
"document_type": llm_json_data.get('document_type'),
"vendor_name": vendor_name_val,
"vendor_cvr": vendor_cvr_val,
"vendor_address": vendor_address_val,
"vendor_city": vendor_city_val,
"vendor_postal_code": vendor_postal_val,
"vendor_email": vendor_email_val,
"lines": formatted_lines
}
elif extraction:
# Fallback to extraction table columns if no LLM JSON
llm_data = {
"invoice_number": extraction.get('document_id'),
"invoice_date": extraction.get('document_date').isoformat() if extraction.get('document_date') else None,
"due_date": extraction.get('due_date').isoformat() if extraction.get('due_date') else None,
"total_amount": float(extraction.get('total_amount')) if extraction.get('total_amount') else None,
"currency": extraction.get('currency') or 'DKK',
"document_type": extraction.get('document_type'),
"vendor_name": extraction.get('vendor_name'),
"vendor_cvr": extraction.get('vendor_cvr'),
"vendor_address": None,
"vendor_city": None,
"vendor_postal_code": None,
"vendor_email": None,
"lines": formatted_lines
}
# Get vendor from extraction
vendor_matched_id = extraction.get('vendor_matched_id') if extraction else None
return {
"file_id": file_id,
"filename": file_info['filename'],
"status": file_info['status'],
"uploaded_at": file_info['uploaded_at'],
"vendor_matched_id": vendor_matched_id,
"llm_data": llm_data,
"extraction": extraction,
"extraction_lines": extraction_lines if extraction_lines else [],
"pdf_text_preview": pdf_text[:5000] if pdf_text else None
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to get extracted data: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/incoming-files/{file_id}")
async def update_incoming_file(file_id: int, data: Dict):
"""
Update incoming file metadata (e.g., detected_vendor_id)
"""
try:
# Check if file exists
file_info = execute_query(
"SELECT file_id FROM incoming_files WHERE file_id = %s",
(file_id,)
)
if not file_info:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
# Build update query dynamically based on provided fields
allowed_fields = ['detected_vendor_id', 'status', 'notes']
update_fields = []
update_values = []
for field in allowed_fields:
if field in data:
update_fields.append(f"{field} = %s")
update_values.append(data[field])
if not update_fields:
raise HTTPException(status_code=400, detail="No valid fields to update")
# Execute update
update_values.append(file_id)
query = f"UPDATE incoming_files SET {', '.join(update_fields)} WHERE file_id = %s"
execute_update(query, tuple(update_values))
logger.info(f"✅ Updated file {file_id}: {update_fields}")
return {
"file_id": file_id,
"message": "File updated successfully",
"updated_fields": list(data.keys())
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update file: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/incoming-files/{file_id}")
async def delete_incoming_file(file_id: int):
"""
Delete incoming file from database and disk
"""
try:
# Get file info
file_info = execute_query(
"SELECT file_path FROM incoming_files WHERE file_id = %s",
(file_id,)
)
if not file_info:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
file_path = Path(file_info[0]['file_path'])
# Delete from database
execute_update(
"DELETE FROM incoming_files WHERE file_id = %s",
(file_id,)
)
# Delete from disk if exists
if file_path.exists():
file_path.unlink()
logger.info(f"🗑️ Deleted file from disk: {file_path}")
logger.info(f"✅ Deleted file {file_id}")
return {
"file_id": file_id,
"message": "File deleted successfully"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to delete file: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/download")
async def download_pending_file(file_id: int):
"""View PDF in browser"""
from fastapi.responses import FileResponse
from pathlib import Path
try:
# Get file info
file_result = execute_query(
"SELECT * FROM incoming_files WHERE file_id = %s",
(file_id,))
if not file_result:
raise HTTPException(status_code=404, detail="Fil ikke fundet")
file_info = file_result[0] # Get first row
file_path = Path(file_info['file_path'])
if not file_path.exists():
raise HTTPException(status_code=404, detail="Fil findes ikke på disk")
# Return with inline disposition so browser displays it instead of downloading
return FileResponse(
path=str(file_path),
media_type='application/pdf',
headers={"Content-Disposition": f"inline; filename={file_info['filename']}"}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to view file: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/pdf")
async def get_file_pdf(file_id: int):
"""Get PDF file for viewing (alias for download endpoint)"""
return await download_pending_file(file_id)
@router.post("/supplier-invoices/files/{file_id}/link-vendor")
async def link_vendor_to_extraction(file_id: int, data: dict):
"""Link an existing vendor to the extraction"""
try:
vendor_id = data.get('vendor_id')
if not vendor_id:
raise HTTPException(status_code=400, detail="vendor_id is required")
# Verify vendor exists
vendor = execute_query_single(
"SELECT id, name FROM vendors WHERE id = %s",
(vendor_id,))
if not vendor:
raise HTTPException(status_code=404, detail="Leverandør ikke fundet")
# Get latest extraction for this file
extraction = execute_query_single(
"SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
(file_id,))
if not extraction:
# No extraction exists (e.g. custom template match or not yet processed)
# Create a minimal placeholder extraction so vendor can be linked
logger.info(f"⚠️ No extraction for file {file_id} — creating minimal extraction for vendor link")
extraction_id = execute_insert(
"""INSERT INTO extractions
(file_id, vendor_matched_id, vendor_name, vendor_cvr,
document_id, document_type, document_type_detected,
currency, confidence, status)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING extraction_id""",
(file_id, vendor_id,
vendor['name'], None,
None, 'invoice', 'invoice',
'DKK', 1.0, 'manual')
)
else:
extraction_id = extraction['extraction_id']
# Update extraction with vendor match
execute_update(
"UPDATE extractions SET vendor_matched_id = %s WHERE extraction_id = %s",
(vendor_id, extraction_id)
)
# Also update incoming_files so table shows vendor immediately
execute_update(
"UPDATE incoming_files SET detected_vendor_id = %s, status = 'processed' WHERE file_id = %s",
(vendor_id, file_id)
)
logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to file {file_id}")
return {
"status": "success",
"vendor_id": vendor_id,
"vendor_name": vendor['name']
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to link vendor: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/supplier-invoices/files/{file_id}")
async def delete_pending_file_endpoint(file_id: int):
"""Slet uploaded fil og relateret data"""
import os
from pathlib import Path
try:
# Get file info
file_info = execute_query_single(
"SELECT * FROM incoming_files WHERE file_id = %s",
(file_id,))
if not file_info:
raise HTTPException(status_code=404, detail="Fil ikke fundet")
# Check if already converted to invoice
invoice_exists = execute_query_single(
"""SELECT si.id FROM supplier_invoices si
JOIN extractions e ON si.extraction_id = e.extraction_id
WHERE e.file_id = %s""",
(file_id,))
if invoice_exists:
raise HTTPException(
status_code=400,
detail="Kan ikke slette fil - der er allerede oprettet en faktura fra denne fil"
)
# Delete from database (cascade will handle extractions)
execute_update(
"DELETE FROM incoming_files WHERE file_id = %s",
(file_id,)
)
# Delete physical file
if file_info and file_info.get('file_path'):
file_path = Path(file_info['file_path'])
if file_path.exists():
os.remove(file_path)
logger.info(f"🗑️ Deleted file: {file_path}")
return {"message": "Fil slettet", "file_id": file_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to delete file: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/supplier-invoices/files/{file_id}")
async def update_file_status(file_id: int, data: dict):
"""Opdater status på uploadet fil"""
try:
allowed_statuses = ['pending', 'processing', 'processed', 'ai_extracted', 'completed', 'failed']
new_status = data.get('status')
if not new_status or new_status not in allowed_statuses:
raise HTTPException(status_code=400, detail=f"Ugyldig status. Tilladte: {', '.join(allowed_statuses)}")
execute_update(
"UPDATE incoming_files SET status = %s, processed_at = CURRENT_TIMESTAMP WHERE file_id = %s",
(new_status, file_id)
)
logger.info(f"✅ Updated file {file_id} status to {new_status}")
return {"status": "success", "file_id": file_id, "new_status": new_status}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update file status: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/files/{file_id}/link-vendor")
async def link_vendor_to_extraction(file_id: int, data: dict):
"""Link en eksisterende leverandør til en extraction"""
try:
vendor_id = data.get('vendor_id')
if not vendor_id:
raise HTTPException(status_code=400, detail="vendor_id er påkrævet")
# Verify vendor exists
vendor = execute_query_single(
"SELECT id, name FROM vendors WHERE id = %s",
(vendor_id,))
if not vendor:
raise HTTPException(status_code=404, detail=f"Leverandør {vendor_id} ikke fundet")
# Get latest extraction for this file
extraction = execute_query_single(
"SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
(file_id,))
if not extraction:
# Create minimal extraction if none exists
logger.info(f"⚠️ No extraction for file {file_id} — creating minimal extraction for vendor link")
extraction_id = execute_insert(
"""INSERT INTO extractions
(file_id, vendor_matched_id, vendor_name, vendor_cvr,
document_id, document_type, document_type_detected,
currency, confidence, status)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING extraction_id""",
(file_id, vendor_id, vendor['name'], None,
None, 'invoice', 'invoice', 'DKK', 1.0, 'manual')
)
else:
extraction_id = extraction['extraction_id']
execute_update(
"UPDATE extractions SET vendor_matched_id = %s WHERE extraction_id = %s",
(vendor_id, extraction_id)
)
execute_update(
"UPDATE incoming_files SET detected_vendor_id = %s, status = 'processed' WHERE file_id = %s",
(vendor_id, file_id)
)
logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to extraction {extraction_id}")
return {
"status": "success",
"vendor_id": vendor_id,
"vendor_name": vendor['name'],
"extraction_id": extraction_id
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to link vendor: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/from-extraction/{file_id}")
async def create_invoice_from_extraction(file_id: int):
"""Opret leverandørfaktura fra extraction data"""
try:
# Get latest extraction for this file
extraction = execute_query_single(
"""SELECT e.*, v.name as vendor_name
FROM extractions e
LEFT JOIN vendors v ON v.id = e.vendor_matched_id
WHERE e.file_id = %s
ORDER BY e.created_at DESC
LIMIT 1""",
(file_id,))
if not extraction:
raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil")
extraction_data = extraction[0]
# Check if vendor is matched
if not extraction_data['vendor_matched_id']:
raise HTTPException(
status_code=400,
detail="Leverandør skal linkes før faktura kan oprettes. Brug 'Link eller Opret Leverandør' først."
)
# Check if invoice already exists
existing = execute_query_single(
"SELECT id FROM supplier_invoices WHERE extraction_id = %s",
(extraction_data['extraction_id'],))
if existing:
raise HTTPException(status_code=400, detail="Faktura er allerede oprettet fra denne extraction")
# Get extraction lines
lines = execute_query_single(
"""SELECT * FROM extraction_lines
WHERE extraction_id = %s
ORDER BY line_number""",
(extraction_data['extraction_id'],)
)
# Parse LLM response JSON if it's a string
import json
llm_data = extraction_data.get('llm_response_json')
if isinstance(llm_data, str):
try:
llm_data = json.loads(llm_data)
except:
llm_data = {}
elif not llm_data:
llm_data = {}
# Get invoice number and type from LLM data or generate one
invoice_number = llm_data.get('invoice_number') if llm_data else None
if not invoice_number:
invoice_number = f"INV-{file_id}"
# Detect document type (invoice or credit_note)
document_type = llm_data.get('document_type', 'invoice') if llm_data else 'invoice'
invoice_type = 'credit_note' if document_type == 'credit_note' else 'invoice'
# Get dates - use today as fallback if missing
from datetime import datetime, timedelta
invoice_date = extraction_data.get('document_date')
if not invoice_date:
invoice_date = datetime.now().strftime('%Y-%m-%d')
logger.warning(f"⚠️ No invoice_date found, using today: {invoice_date}")
due_date = extraction_data.get('due_date')
if not due_date:
# Default to 30 days from invoice date
inv_date_obj = datetime.strptime(invoice_date, '%Y-%m-%d')
due_date = (inv_date_obj + timedelta(days=30)).strftime('%Y-%m-%d')
logger.warning(f"⚠️ No due_date found, using invoice_date + 30 days: {due_date}")
# Create supplier invoice
invoice_id = execute_insert(
"""INSERT INTO supplier_invoices (
vendor_id, invoice_number, invoice_date, due_date,
total_amount, currency, status, extraction_id, notes, invoice_type
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id""",
(
extraction_data['vendor_matched_id'],
invoice_number,
invoice_date,
due_date,
extraction_data['total_amount'],
extraction_data['currency'],
'credited' if invoice_type == 'credit_note' else 'unpaid',
extraction_data['extraction_id'],
f"Oprettet fra AI extraction (file_id: {file_id})",
invoice_type
)
)
# Create invoice lines
if lines:
for line in lines:
execute_update(
"""INSERT INTO supplier_invoice_lines (
supplier_invoice_id, description, quantity, unit_price,
line_total, vat_rate, vat_amount
) VALUES (%s, %s, %s, %s, %s, %s, %s)""",
(
invoice_id,
line['description'],
line.get('quantity') or 1,
line.get('unit_price') or 0,
line.get('line_total') or 0,
line.get('vat_rate') or 25.00, # Default 25% Danish VAT if NULL
line.get('vat_amount')
)
)
# Update file status
execute_update(
"UPDATE incoming_files SET status = 'completed' WHERE file_id = %s",
(file_id,)
)
logger.info(f"✅ Created supplier invoice {invoice_id} from extraction {extraction['extraction_id']}")
return {
"status": "success",
"invoice_id": invoice_id,
"invoice_number": invoice_number,
"vendor_name": extraction['vendor_name'],
"total_amount": extraction['total_amount'],
"currency": extraction['currency']
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to create invoice from extraction: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Keep existing endpoints below...
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to delete file: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ==========
@router.get("/supplier-invoices/templates")
async def list_templates():
"""Hent alle templates (både database og invoice2data YAML)"""
try:
# Get database templates
query = """
SELECT t.*, v.name as vendor_name
FROM supplier_invoice_templates t
LEFT JOIN vendors v ON t.vendor_id = v.id
WHERE t.is_active = true
ORDER BY t.created_at DESC
"""
db_templates = execute_query(query) or []
# Get invoice2data templates
invoice2data_service = get_invoice2data_service()
invoice2data_templates = []
for template_name, template_data in invoice2data_service.templates.items():
# Extract vendor CVR from keywords
vendor_cvr = None
keywords = template_data.get('keywords', [])
for keyword in keywords:
if isinstance(keyword, str) and keyword.isdigit() and len(keyword) == 8:
vendor_cvr = keyword
break
# Get vendor info from database if CVR found
vendor_name = template_data.get('issuer', 'Ukendt')
vendor_id = None
if vendor_cvr:
vendor = execute_query(
"SELECT id, name FROM vendors WHERE cvr_number = %s",
(vendor_cvr,))
if vendor and len(vendor) > 0:
vendor_data = vendor[0]
vendor_id = vendor_data['id']
vendor_name = vendor_data['name']
invoice2data_templates.append({
'template_id': -1, # Negative ID to distinguish from DB templates
'template_name': f"Invoice2Data: {template_name}",
'template_type': 'invoice2data',
'yaml_filename': template_name,
'vendor_id': vendor_id,
'vendor_name': vendor_name,
'vendor_cvr': vendor_cvr,
'default_product_category': template_data.get('default_product_category', 'varesalg'),
'default_product_group_number': template_data.get('default_product_group_number', 1),
'usage_count': 0, # Could track this separately
'is_active': True,
'detection_patterns': keywords,
'field_mappings': template_data.get('fields', {}),
'created_at': None
})
# Combine both types
all_templates = db_templates + invoice2data_templates
return all_templates
except Exception as e:
logger.error(f"❌ Failed to list templates: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/templates/{template_id}")
async def get_template(template_id: int):
"""Hent et specifikt template med vendor info"""
try:
query = """
SELECT t.*, v.name as vendor_name, v.cvr_number as vendor_cvr
FROM supplier_invoice_templates t
LEFT JOIN vendors v ON t.vendor_id = v.id
WHERE t.template_id = %s AND t.is_active = true
"""
template = execute_query_single(query, (template_id,))
if not template:
raise HTTPException(status_code=404, detail="Template not found")
return template
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to get template {template_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/search-vendor")
async def search_vendor_by_info(request: Dict):
"""
Søg efter vendor baseret navn, CVR, eller opret ny
Request body:
{
"vendor_name": "DCS ApS",
"vendor_cvr": "12345678",
"vendor_address": "Vej 1, 2000 By",
"create_if_missing": true
}
"""
try:
vendor_name = request.get('vendor_name')
vendor_cvr = request.get('vendor_cvr')
vendor_address = request.get('vendor_address')
create_if_missing = request.get('create_if_missing', False)
# Search by CVR first (most accurate)
if vendor_cvr:
vendor = execute_query_single(
"SELECT id, name, cvr_number FROM vendors WHERE cvr_number = %s",
(vendor_cvr,))
if vendor:
return {
"found": True,
"vendor_id": vendor['id'],
"vendor_name": vendor['name'],
"source": "cvr_match"
}
# Search by name (fuzzy)
if vendor_name:
vendors = execute_query_single(
"SELECT id, name, cvr_number FROM vendors WHERE LOWER(name) LIKE LOWER(%s) LIMIT 5",
(f"%{vendor_name}%",)
)
if vendors:
return {
"found": True,
"matches": vendors,
"source": "name_search",
"message": "Flere mulige matches - vælg en eller opret ny"
}
# Create new vendor if requested
if create_if_missing and vendor_name:
from app.core.config import settings
# Validate not creating vendor with own CVR
if vendor_cvr and settings.OWN_CVR in vendor_cvr:
raise HTTPException(
status_code=400,
detail=f"Kan ikke oprette vendor med eget CVR ({settings.OWN_CVR})"
)
new_vendor_id = execute_insert(
"""INSERT INTO vendors (name, cvr_number, address, created_at)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""",
(vendor_name, vendor_cvr, vendor_address)
)
logger.info(f"✅ Created new vendor: {vendor_name} (ID: {new_vendor_id})")
return {
"found": False,
"created": True,
"vendor_id": new_vendor_id,
"vendor_name": vendor_name,
"source": "newly_created"
}
return {
"found": False,
"message": "Ingen vendor fundet - angiv create_if_missing=true for at oprette"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Vendor search failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/ai/analyze")
async def ai_analyze_invoice(request: Dict):
"""Brug AI til at analysere faktura og foreslå template felter"""
try:
pdf_text = request.get('pdf_text', '')
vendor_id = request.get('vendor_id')
if not pdf_text:
raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet")
# Build enhanced PDF text with instruction
from app.core.config import settings
enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering.
RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON!
REQUIRED STRUKTUR (alle felter skal med):
{{
"invoice_number": "5082481",
"invoice_date": "24/10-25",
"total_amount": "1471.20",
"cvr": "29522790",
"detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"],
"lines_start": "Nr.VarenrTekst",
"lines_end": "Subtotal"
}}
FIND FØLGENDE:
1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice")
2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY)
3. total_amount: Total beløb
- Søg efter "Total", "I alt", "Totalbeløb"
- Hvis beløbet er næste linje, match sidste tal
- Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20)
4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK")
- IGNORER CVR {settings.OWN_CVR} - dette er KØBERS CVR, ikke leverandør!
- Find LEVERANDØRENS CVR (normalt i toppen/header)
5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren
- Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S")
- Website eller email (f.eks. "WWW.DCS.DK")
- Adresse element (f.eks. "Høgemosevænget", "Mårkærvej")
- UNDGÅ generiske ord som "Faktura", "Danmark", "Side"
6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr")
7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af")
VIGTIGT:
- detection_patterns SKAL være mindst 3 specifikke tekststrenge
- Vælg tekststrenge der er UNIKKE for denne leverandør
- CVR SKAL være leverandørens - IKKE {settings.OWN_CVR} (det er køber)
- LAD VÆRE med at lave patterns eller line_item regex - kun udtræk data
PDF TEKST:
{pdf_text[:2000]}
RETURNER KUN JSON - intet andet!"""
# Call Ollama
logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst")
result = await ollama_service.extract_from_text(enhanced_text)
if not result:
raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen")
logger.info(f"✅ AI analyse gennemført: {result}")
return result
except Exception as e:
logger.error(f"❌ AI analyse fejlede: {e}")
raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}")
@router.post("/supplier-invoices/templates")
async def create_template(request: Dict):
"""
Opret ny template
Request body:
{
"vendor_id": 1,
"template_name": "Test Template",
"detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}],
"field_mappings": {"invoice_number": {"pattern": r"Nummer\s*(\d+)", "group": 1}}
}
"""
try:
import json
from app.core.config import settings
vendor_id = request.get('vendor_id')
template_name = request.get('template_name')
detection_patterns = request.get('detection_patterns', [])
field_mappings = request.get('field_mappings', {})
default_product_category = request.get('default_product_category', 'varesalg')
if not vendor_id or not template_name:
raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet")
# Validate that vendor CVR is not own company
vendor_cvr_mapping = field_mappings.get('vendor_cvr', {})
if vendor_cvr_mapping:
# Extract CVR value from pattern or value field
cvr_value = vendor_cvr_mapping.get('value') or vendor_cvr_mapping.get('pattern', '')
if settings.OWN_CVR in str(cvr_value):
raise HTTPException(
status_code=400,
detail=f"CVR {cvr_value} matcher egen virksomhed ({settings.OWN_CVR}). Brug leverandørens CVR, ikke købers!"
)
# Insert template and get template_id
query = """
INSERT INTO supplier_invoice_templates
(vendor_id, template_name, detection_patterns, field_mappings, default_product_category)
VALUES (%s, %s, %s, %s, %s)
RETURNING template_id
"""
result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings), default_product_category))
template_id = result[0]['template_id'] if result else None
if not template_id:
raise HTTPException(status_code=500, detail="Kunne ikke oprette template")
# Reload templates in cache
template_service.reload_templates()
logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}")
return {"template_id": template_id, "message": "Template oprettet"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to create template: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/{invoice_id}")
async def get_supplier_invoice(invoice_id: int):
"""Get single supplier invoice with lines"""
try:
invoice_result = execute_query(
"""SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id
FROM supplier_invoices si
LEFT JOIN vendors v ON si.vendor_id = v.id
WHERE si.id = %s""",
(invoice_id,))
if not invoice_result:
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
invoice = invoice_result[0]
# Get lines
lines = execute_query_single(
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
(invoice_id,)
)
invoice['lines'] = lines
return invoice
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to get supplier invoice {invoice_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices")
async def create_supplier_invoice(data: Dict):
"""
Create new supplier invoice
Required fields:
- invoice_number: str
- vendor_id: int
- invoice_date: str (YYYY-MM-DD)
- total_amount: float
Optional fields:
- due_date: str (YYYY-MM-DD) - defaults to invoice_date + 30 days
- vat_amount: float
- net_amount: float
- currency: str (default 'DKK')
- description: str
- notes: str
- lines: List[Dict] with line items
"""
try:
# Validate required fields
required = ['invoice_number', 'vendor_id', 'invoice_date', 'total_amount']
missing = [f for f in required if f not in data]
if missing:
raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}")
# Calculate due_date if not provided (30 days default)
invoice_date = datetime.fromisoformat(data['invoice_date'])
due_date = data.get('due_date')
if not due_date:
due_date = (invoice_date + timedelta(days=30)).strftime('%Y-%m-%d')
# Determine invoice type (default to invoice)
invoice_type = data.get('invoice_type', 'invoice')
if invoice_type not in ['invoice', 'credit_note']:
invoice_type = 'invoice'
# Insert supplier invoice
invoice_id = execute_insert(
"""INSERT INTO supplier_invoices
(invoice_number, vendor_id, vendor_name, invoice_date, due_date,
total_amount, vat_amount, net_amount, currency, description, notes,
status, created_by, invoice_type)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
(
data['invoice_number'],
data['vendor_id'],
data.get('vendor_name'),
data['invoice_date'],
due_date,
data['total_amount'],
data.get('vat_amount', 0),
data.get('net_amount', data['total_amount']),
data.get('currency', 'DKK'),
data.get('description'),
data.get('notes'),
'credited' if invoice_type == 'credit_note' else 'pending',
data.get('created_by'),
invoice_type
)
)
# Insert lines if provided
if data.get('lines'):
for idx, line in enumerate(data['lines'], start=1):
# Map vat_code: I52 for reverse charge, I25 for standard
vat_code = line.get('vat_code', 'I25')
execute_insert(
"""INSERT INTO supplier_invoice_lines
(supplier_invoice_id, line_number, description, quantity, unit_price,
line_total, vat_code, vat_rate, vat_amount, contra_account, sku)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
(
invoice_id,
line.get('line_number', idx),
line.get('description'),
line.get('quantity', 1),
line.get('unit_price', 0),
line.get('line_total', 0),
vat_code,
line.get('vat_rate', 25.00),
line.get('vat_amount', 0),
line.get('contra_account', '5810'),
line.get('sku')
)
)
logger.info(f"✅ Created supplier invoice: {data['invoice_number']} (ID: {invoice_id})")
return {
"success": True,
"invoice_id": invoice_id,
"invoice_number": data['invoice_number'],
"due_date": due_date
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to create supplier invoice: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put("/supplier-invoices/{invoice_id}")
async def update_supplier_invoice(invoice_id: int, data: Dict):
"""Update supplier invoice details"""
try:
# Check if invoice exists
existing = execute_query(
"SELECT id, status FROM supplier_invoices WHERE id = %s",
(invoice_id,))
if not existing:
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
existing_invoice = existing[0]
# Don't allow editing if already sent to e-conomic
if existing_invoice['status'] == 'sent_to_economic':
raise HTTPException(
status_code=400,
detail="Cannot edit invoice that has been sent to e-conomic"
)
# Build update query dynamically based on provided fields
update_fields = []
params = []
allowed_fields = ['invoice_number', 'vendor_id', 'vendor_name', 'invoice_date',
'due_date', 'total_amount', 'vat_amount', 'net_amount',
'currency', 'description', 'notes', 'status']
for field in allowed_fields:
if field in data:
update_fields.append(f"{field} = %s")
params.append(data[field])
if not update_fields:
raise HTTPException(status_code=400, detail="No fields to update")
params.append(invoice_id)
query = f"""
UPDATE supplier_invoices
SET {', '.join(update_fields)}, updated_at = CURRENT_TIMESTAMP
WHERE id = %s
"""
execute_update(query, tuple(params))
logger.info(f"✅ Updated supplier invoice {invoice_id}")
return {"success": True, "invoice_id": invoice_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update supplier invoice {invoice_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.patch("/supplier-invoices/{invoice_id}/lines/{line_id}")
async def update_invoice_line(invoice_id: int, line_id: int, data: Dict):
"""
Update supplier invoice line item
Supports updating: contra_account, line_purpose, resale_customer_id, resale_order_number
"""
try:
# Check if invoice exists and is not sent to e-conomic
invoice = execute_query(
"SELECT id, status FROM supplier_invoices WHERE id = %s",
(invoice_id,))
if not invoice:
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
invoice_data = invoice[0]
# Don't allow editing if already sent to e-conomic
if invoice_data['status'] == 'sent_to_economic':
raise HTTPException(
status_code=400,
detail="Cannot edit invoice line that has been sent to e-conomic"
)
# Check if line exists
line = execute_query(
"SELECT id FROM supplier_invoice_lines WHERE id = %s AND supplier_invoice_id = %s",
(line_id, invoice_id))
if not line or len(line) == 0:
raise HTTPException(status_code=404, detail=f"Line {line_id} not found in invoice {invoice_id}")
# Build update query
update_fields = []
params = []
allowed_fields = ['contra_account', 'line_purpose', 'resale_customer_id',
'resale_order_number', 'description', 'quantity',
'unit_price', 'vat_rate', 'total_amount']
for field in allowed_fields:
if field in data:
update_fields.append(f"{field} = %s")
params.append(data[field])
if not update_fields:
raise HTTPException(status_code=400, detail="No fields to update")
params.append(line_id)
query = f"""
UPDATE supplier_invoice_lines
SET {', '.join(update_fields)}
WHERE id = %s
"""
execute_update(query, tuple(params))
logger.info(f"✅ Updated invoice line {line_id} (Invoice {invoice_id})")
return {"success": True, "line_id": line_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update invoice line {line_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/supplier-invoices/{invoice_id}")
async def delete_supplier_invoice(invoice_id: int):
"""Delete supplier invoice (soft delete if integrated with e-conomic)"""
try:
invoice = execute_query_single(
"SELECT id, invoice_number, economic_voucher_number FROM supplier_invoices WHERE id = %s",
(invoice_id,))
if not invoice:
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
# If sent to e-conomic, only mark as cancelled (don't delete)
if invoice.get('economic_voucher_number'):
execute_update(
"UPDATE supplier_invoices SET status = 'cancelled', updated_at = CURRENT_TIMESTAMP WHERE id = %s",
(invoice_id,)
)
logger.info(f"⚠️ Marked supplier invoice {invoice['invoice_number']} as cancelled (sent to e-conomic)")
return {"success": True, "message": "Invoice marked as cancelled", "invoice_id": invoice_id}
# Otherwise, delete invoice and lines
execute_update("DELETE FROM supplier_invoice_lines WHERE supplier_invoice_id = %s", (invoice_id,))
execute_update("DELETE FROM supplier_invoices WHERE id = %s", (invoice_id,))
logger.info(f"🗑️ Deleted supplier invoice {invoice['invoice_number']} (ID: {invoice_id})")
return {"success": True, "message": "Invoice deleted", "invoice_id": invoice_id}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to delete supplier invoice {invoice_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ========== E-CONOMIC INTEGRATION ==========
class ApproveRequest(BaseModel):
approved_by: str
@router.post("/supplier-invoices/{invoice_id}/approve")
async def approve_supplier_invoice(invoice_id: int, request: ApproveRequest):
"""Approve supplier invoice for payment"""
try:
invoice = execute_query_single(
"SELECT id, invoice_number, status FROM supplier_invoices WHERE id = %s",
(invoice_id,))
if not invoice:
raise HTTPException(status_code=404, detail=f"Faktura {invoice_id} ikke fundet")
if invoice['status'] != 'pending':
raise HTTPException(status_code=400, detail=f"Faktura har allerede status '{invoice['status']}' - kan kun godkende fakturaer med status 'pending'")
execute_update(
"""UPDATE supplier_invoices
SET status = 'approved', approved_by = %s, approved_at = CURRENT_TIMESTAMP
WHERE id = %s""",
(request.approved_by, invoice_id)
)
logger.info(f"✅ Approved supplier invoice {invoice['invoice_number']} by {request.approved_by}")
return {"success": True, "invoice_id": invoice_id, "approved_by": request.approved_by}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to approve invoice {invoice_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/{invoice_id}/send-to-economic")
async def send_to_economic(invoice_id: int):
"""
Send approved supplier invoice to e-conomic kassekladde
Creates voucher entry in e-conomic journals
"""
try:
# Get invoice with lines
invoice = execute_query_single(
"""SELECT si.*, v.economic_supplier_number as vendor_economic_id, v.name as vendor_full_name
FROM supplier_invoices si
LEFT JOIN vendors v ON si.vendor_id = v.id
WHERE si.id = %s""",
(invoice_id,))
if not invoice:
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
if invoice['status'] != 'approved':
raise HTTPException(status_code=400, detail="Invoice must be approved before sending to e-conomic")
if invoice.get('economic_voucher_number'):
raise HTTPException(status_code=400, detail="Invoice already sent to e-conomic")
# Get lines
lines = execute_query_single(
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
(invoice_id,)
)
if not lines:
raise HTTPException(status_code=400, detail="Invoice must have at least one line item")
# Check if vendor exists in e-conomic
economic = get_economic_service()
vendor_economic_id = invoice.get('vendor_economic_id')
# If vendor not in e-conomic, create it
if not vendor_economic_id:
vendor_result = await economic.search_supplier_by_name(invoice.get('vendor_full_name') or invoice.get('vendor_name'))
if vendor_result:
vendor_economic_id = vendor_result['supplierNumber']
# Update local vendor record
execute_update(
"UPDATE vendors SET economic_supplier_number = %s WHERE id = %s",
(vendor_economic_id, invoice['vendor_id'])
)
else:
# Create new supplier in e-conomic
new_supplier = await economic.create_supplier({
'name': invoice.get('vendor_full_name') or invoice.get('vendor_name'),
'currency': invoice.get('currency', 'DKK')
})
if new_supplier and new_supplier.get('supplierNumber'):
vendor_economic_id = new_supplier['supplierNumber']
else:
raise HTTPException(status_code=500, detail="Failed to create supplier in e-conomic")
# Get default journal number from settings
journal_setting = execute_query(
"SELECT setting_value FROM supplier_invoice_settings WHERE setting_key = 'economic_default_journal'")
journal_number = int(journal_setting[0]['setting_value']) if journal_setting and len(journal_setting) > 0 else 1
# Build VAT breakdown from lines
vat_breakdown = {}
line_items = []
for line in lines:
vat_code = line.get('vat_code', 'I25')
if vat_code not in vat_breakdown:
vat_breakdown[vat_code] = {
'net': 0,
'vat': 0,
'gross': 0,
'rate': line.get('vat_rate', 25.00)
}
line_total = float(line.get('line_total', 0))
vat_amount = float(line.get('vat_amount', 0))
net_amount = line_total - vat_amount
vat_breakdown[vat_code]['net'] += net_amount
vat_breakdown[vat_code]['vat'] += vat_amount
vat_breakdown[vat_code]['gross'] += line_total
line_items.append({
'description': line.get('description'),
'quantity': float(line.get('quantity', 1)),
'unit_price': float(line.get('unit_price', 0)),
'line_total': line_total,
'vat_code': vat_code,
'vat_amount': vat_amount,
'contra_account': line.get('contra_account', '5810'),
'sku': line.get('sku')
})
# Send to e-conomic
result = await economic.create_journal_supplier_invoice(
journal_number=journal_number,
supplier_number=vendor_economic_id,
invoice_number=invoice['invoice_number'],
invoice_date=invoice['invoice_date'].isoformat() if isinstance(invoice['invoice_date'], date) else invoice['invoice_date'],
total_amount=float(invoice['total_amount']),
vat_breakdown=vat_breakdown,
line_items=line_items,
due_date=invoice['due_date'].isoformat() if invoice.get('due_date') and isinstance(invoice['due_date'], date) else invoice.get('due_date'),
text=invoice.get('description') or f"Supplier invoice {invoice['invoice_number']}"
)
if result.get('error'):
raise HTTPException(status_code=500, detail=result.get('message', 'Failed to create voucher in e-conomic'))
# Update invoice with e-conomic details
execute_update(
"""UPDATE supplier_invoices
SET status = 'sent_to_economic',
economic_supplier_number = %s,
economic_journal_number = %s,
economic_voucher_number = %s,
economic_accounting_year = %s,
sent_to_economic_at = CURRENT_TIMESTAMP
WHERE id = %s""",
(
vendor_economic_id,
result['journal_number'],
result['voucher_number'],
result['accounting_year'],
invoice_id
)
)
# Upload attachment if file_path exists
if invoice.get('file_path') and os.path.exists(invoice['file_path']):
attachment_result = await economic.upload_voucher_attachment(
journal_number=result['journal_number'],
accounting_year=result['accounting_year'],
voucher_number=result['voucher_number'],
pdf_path=invoice['file_path'],
filename=f"{invoice['invoice_number']}.pdf"
)
if attachment_result.get('success'):
logger.info(f"📎 Uploaded attachment for voucher {result['voucher_number']}")
logger.info(f"✅ Sent supplier invoice {invoice['invoice_number']} to e-conomic (voucher #{result['voucher_number']})")
return {
"success": True,
"invoice_id": invoice_id,
"voucher_number": result['voucher_number'],
"journal_number": result['journal_number'],
"accounting_year": result['accounting_year']
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to send invoice {invoice_id} to e-conomic: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/economic/journals")
async def get_economic_journals():
"""Get available e-conomic journals (kassekladder)"""
try:
economic = get_economic_service()
journals = await economic.get_supplier_invoice_journals()
return {"journals": journals}
except Exception as e:
logger.error(f"❌ Failed to get e-conomic journals: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/economic/accounts")
async def get_economic_accounts(refresh: bool = False):
"""
Get e-conomic chart of accounts (kontoplan) from cache
Args:
refresh: If True, fetch fresh data from e-conomic API
Returns:
List of accounts with accountNumber, name, accountType, vatCode
"""
try:
# If refresh requested, sync from e-conomic first
if refresh:
economic = get_economic_service()
count = await economic.sync_accounts_to_database()
logger.info(f"✅ Refreshed {count} accounts from e-conomic")
# Fetch from database cache
accounts = execute_query("""
SELECT
account_number as "accountNumber",
name,
account_type as "accountType",
vat_code as "vatCode",
balance,
last_synced as "lastSynced"
FROM economic_accounts
WHERE is_active = TRUE
ORDER BY account_number
""")
# If no accounts in cache and not already refreshed, try syncing
if not accounts and not refresh:
economic = get_economic_service()
count = await economic.sync_accounts_to_database()
logger.info(f"✅ Initial sync: {count} accounts from e-conomic")
# Retry fetch
accounts = execute_query("""
SELECT
account_number as "accountNumber",
name,
account_type as "accountType",
vat_code as "vatCode",
balance,
last_synced as "lastSynced"
FROM economic_accounts
WHERE is_active = TRUE
ORDER BY account_number
""")
return {"accounts": accounts}
except Exception as e:
logger.error(f"❌ Failed to get e-conomic accounts: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ========== STATISTICS & REPORTS ==========
@router.get("/supplier-invoices/stats/overview")
async def get_payment_overview():
"""
Get overview of supplier invoices payment status
Returns stats for total, paid, overdue, due soon, and pending invoices
"""
try:
today = date.today().isoformat()
stats = execute_query_single("""
SELECT
COUNT(*) as total_count,
SUM(CASE WHEN paid_date IS NOT NULL THEN 1 ELSE 0 END) as paid_count,
SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN 1 ELSE 0 END) as overdue_count,
SUM(CASE WHEN paid_date IS NULL AND due_date >= %s AND due_date <= (%s::date + INTERVAL '7 days') THEN 1 ELSE 0 END) as due_soon_count,
SUM(CASE WHEN paid_date IS NULL AND (due_date IS NULL OR due_date > (%s::date + INTERVAL '7 days')) THEN 1 ELSE 0 END) as pending_count,
SUM(total_amount) as total_amount,
SUM(CASE WHEN paid_date IS NOT NULL THEN total_amount ELSE 0 END) as paid_amount,
SUM(CASE WHEN paid_date IS NULL THEN total_amount ELSE 0 END) as unpaid_amount,
SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN total_amount ELSE 0 END) as overdue_amount
FROM supplier_invoices
WHERE status != 'cancelled'
""", (today, today, today, today, today))
return {
"total_invoices": stats.get('total_count', 0) if stats else 0,
"paid_count": stats.get('paid_count', 0) if stats else 0,
"overdue_count": stats.get('overdue_count', 0) if stats else 0,
"due_soon_count": stats.get('due_soon_count', 0) if stats else 0,
"pending_count": stats.get('pending_count', 0) if stats else 0,
"total_amount": float(stats.get('total_amount', 0) or 0) if stats else 0,
"paid_amount": float(stats.get('paid_amount', 0) or 0) if stats else 0,
"unpaid_amount": float(stats.get('unpaid_amount', 0) or 0) if stats else 0,
"overdue_amount": float(stats.get('overdue_amount', 0) or 0) if stats else 0
}
except Exception as e:
logger.error(f"❌ Failed to get payment overview: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/stats/by-vendor")
async def get_stats_by_vendor():
"""Get supplier invoice statistics grouped by vendor"""
try:
stats = execute_query_single("""
SELECT
v.id as vendor_id,
v.name as vendor_name,
COUNT(si.id) as invoice_count,
SUM(si.total_amount) as total_amount,
SUM(CASE WHEN si.paid_date IS NULL THEN si.total_amount ELSE 0 END) as unpaid_amount,
MAX(si.due_date) as latest_due_date
FROM vendors v
LEFT JOIN supplier_invoices si ON v.id = si.vendor_id
WHERE si.status != 'cancelled' OR si.status IS NULL
GROUP BY v.id, v.name
HAVING COUNT(si.id) > 0
ORDER BY unpaid_amount DESC
""")
return {"vendor_stats": stats}
except Exception as e:
logger.error(f"❌ Failed to get vendor stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ========== UPLOAD & AI EXTRACTION ==========
@router.post("/supplier-invoices/upload")
async def upload_supplier_invoice(file: UploadFile = File(...)):
"""
Upload supplier invoice (PDF/image) and extract data using templates
Process:
1. Validate file type and size
2. Calculate SHA256 checksum for duplicate detection
3. Save file to uploads directory
4. Extract text (PDF/OCR)
5. Match template based on PDF content
6. Extract fields using template regex patterns
7. Show form with pre-filled data for user review
Returns:
{
"status": "success|duplicate|needs_review",
"file_id": int,
"template_matched": bool,
"template_id": int,
"extracted_fields": dict,
"confidence": float,
"pdf_text": str # For manual review
}
"""
from app.core.config import settings
try:
# Validate file extension
suffix = Path(file.filename).suffix.lower()
suffix_clean = suffix.lstrip('.')
# Build allowed set — guard against pydantic parsing CSV as a single element
raw = settings.ALLOWED_EXTENSIONS
if len(raw) == 1 and ',' in raw[0]:
raw = [e.strip() for e in raw[0].split(',')]
allowed_clean = {ext.lower().lstrip('.') for ext in raw}
if suffix_clean not in allowed_clean:
raise HTTPException(
status_code=400,
detail=f"Filtype {suffix} ikke tilladt. Tilladte: {', '.join(sorted(allowed_clean))}"
)
# Create upload directory
upload_dir = Path(settings.UPLOAD_DIR)
upload_dir.mkdir(parents=True, exist_ok=True)
# Save file temporarily to calculate checksum
temp_path = upload_dir / f"temp_{datetime.now().timestamp()}_{file.filename}"
try:
# Validate file size while saving
max_size = settings.EMAIL_MAX_UPLOAD_SIZE_MB * 1024 * 1024
total_size = 0
with open(temp_path, "wb") as buffer:
while chunk := await file.read(8192):
total_size += len(chunk)
if total_size > max_size:
temp_path.unlink(missing_ok=True)
raise HTTPException(
status_code=413,
detail=f"Fil for stor (max {settings.EMAIL_MAX_UPLOAD_SIZE_MB}MB)"
)
buffer.write(chunk)
logger.info(f"📥 Uploaded file: {file.filename} ({total_size} bytes)")
# Calculate SHA256 checksum
checksum = ollama_service.calculate_file_checksum(temp_path)
# Check for duplicate file
existing_file = execute_query_single(
"SELECT file_id, status FROM incoming_files WHERE checksum = %s",
(checksum,))
if existing_file:
temp_path.unlink(missing_ok=True)
logger.warning(f"⚠️ Duplicate file detected: {checksum[:16]}...")
# Get existing invoice if linked
existing_invoice = execute_query_single(
"""SELECT si.* FROM supplier_invoices si
JOIN extractions e ON si.extraction_id = e.extraction_id
WHERE e.file_id = %s""",
(existing_file['file_id'],))
return {
"status": "duplicate",
"message": "Denne fil er allerede uploadet",
"file_id": existing_file['file_id'],
"invoice_id": existing_invoice['id'] if existing_invoice else None
}
# Rename to permanent name
final_path = upload_dir / file.filename
counter = 1
while final_path.exists():
final_path = upload_dir / f"{final_path.stem}_{counter}{final_path.suffix}"
counter += 1
temp_path.rename(final_path)
logger.info(f"💾 Saved file as: {final_path.name}")
# Insert file record
file_record = execute_query_single(
"""INSERT INTO incoming_files
(filename, original_filename, file_path, file_size, mime_type, checksum, status)
VALUES (%s, %s, %s, %s, %s, %s, 'pending') RETURNING file_id""",
(final_path.name, file.filename, str(final_path), total_size,
ollama_service._get_mime_type(final_path), checksum))
file_id = file_record['file_id']
logger.info(f"✅ File uploaded successfully - ready for batch analysis")
# Return simple response - all extraction happens in batch analyze
return {
"status": "uploaded",
"file_id": file_id,
"filename": file.filename,
"message": "Fil uploadet - klik 'Analyser alle' for at behandle"
}
except HTTPException as he:
# Mark file as failed if we have file_id
if 'file_id' in locals():
execute_update(
"""UPDATE incoming_files
SET status = 'failed',
error_message = %s,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = %s""",
(str(he.detail), file_id)
)
raise
except Exception as e:
logger.error(f"❌ Upload failed (inner): {e}", exc_info=True)
# Mark file as failed if we have file_id
if 'file_id' in locals():
execute_update(
"""UPDATE incoming_files
SET status = 'failed',
error_message = %s,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = %s""",
(str(e), file_id)
)
raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}")
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Upload failed (outer): {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}")
# ========== ECONOMIC SYNC ==========
@router.post("/supplier-invoices/{invoice_id}/send-to-economic")
async def send_invoice_to_economic(invoice_id: int):
"""Send supplier invoice to e-conomic - requires separate implementation"""
raise HTTPException(status_code=501, detail="e-conomic integration kommer senere")
@router.post("/supplier-invoices/reprocess/{file_id}")
async def reprocess_uploaded_file(file_id: int):
"""
Genbehandl en uploadet fil med template matching
Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet
"""
import json
from datetime import datetime, timedelta
try:
# Get file record
file_record = execute_query_single(
"SELECT * FROM incoming_files WHERE file_id = %s",
(file_id,))
if not file_record:
raise HTTPException(status_code=404, detail=f"Fil {file_id} ikke fundet")
file_path = Path(file_record['file_path'])
if not file_path.exists():
raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}")
logger.info(f"<EFBFBD><EFBFBD> Genbehandler fil {file_id}: {file_record['filename']}")
# Extract text from file
text = await ollama_service._extract_text_from_file(file_path)
# Try template matching
template_id, confidence = template_service.match_template(text)
extracted_fields = {}
vendor_id = None
if template_id and confidence >= 0.5:
logger.info(f"✅ Matched template {template_id} ({confidence:.0%})")
extracted_fields = template_service.extract_fields(text, template_id)
# Check if this is an invoice2data template (ID -1)
is_invoice2data = (template_id == -1)
if is_invoice2data:
def _to_numeric(value):
if value is None:
return None
if isinstance(value, (int, float, Decimal)):
return float(value)
if not isinstance(value, str):
return None
cleaned = value.strip().replace(' ', '')
if not cleaned:
return None
# Common Danish formatting: 25.000,00 or 1.530,00
if ',' in cleaned:
cleaned = cleaned.replace('.', '').replace(',', '.')
try:
return float(cleaned)
except ValueError:
return None
def _clean_document_id(value):
if value is None:
return None
if isinstance(value, str):
cleaned = value.strip()
return cleaned if cleaned and cleaned.lower() != 'none' else None
return str(value)
# Invoice2data doesn't have vendor in cache
logger.info(f"📋 Using invoice2data template")
# Try to find vendor from extracted CVR
if extracted_fields.get('vendor_vat'):
vendor = execute_query_single(
"SELECT id FROM vendors WHERE cvr_number = %s",
(extracted_fields['vendor_vat'],))
if vendor:
vendor_id = vendor['id']
# Fallback: use vendor detected during quick analysis (incoming_files.detected_vendor_id)
if vendor_id is None:
vendor_id = file_record.get('detected_vendor_id')
# Fallback: match by issuer name
if vendor_id is None and extracted_fields.get('issuer'):
vendor = execute_query_single(
"SELECT id FROM vendors WHERE name ILIKE %s ORDER BY id LIMIT 1",
(extracted_fields['issuer'],))
if vendor:
vendor_id = vendor['id']
# Store invoice2data extraction in database
extraction_id = execute_insert(
"""INSERT INTO extractions
(file_id, vendor_matched_id, vendor_name, vendor_cvr,
document_id, document_date, due_date, document_type, document_type_detected,
total_amount, currency, confidence, llm_response_json, status)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING extraction_id""",
(file_id, vendor_id,
extracted_fields.get('issuer'), # vendor_name
extracted_fields.get('vendor_vat'), # vendor_cvr
_clean_document_id(extracted_fields.get('invoice_number')), # document_id
extracted_fields.get('invoice_date'), # document_date
extracted_fields.get('due_date'),
'invoice', # document_type
'invoice', # document_type_detected
_to_numeric(extracted_fields.get('amount_total') if extracted_fields.get('amount_total') is not None else extracted_fields.get('total_amount')),
extracted_fields.get('currency', 'DKK'),
1.0, # invoice2data always 100% confidence
json.dumps(extracted_fields), # llm_response_json
'extracted') # status
)
# Insert line items if extracted
if extracted_fields.get('lines'):
for idx, line in enumerate(extracted_fields['lines'], start=1):
line_total = _to_numeric(line.get('line_total'))
unit_price = _to_numeric(line.get('unit_price'))
quantity = _to_numeric(line.get('quantity'))
execute_insert(
"""INSERT INTO extraction_lines
(extraction_id, line_number, description, quantity, unit_price,
line_total, vat_rate, vat_note, confidence,
ip_address, contract_number, location_street, location_zip, location_city)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING line_id""",
(extraction_id, idx, line.get('description'),
quantity, unit_price,
line_total, None, None, 1.0,
line.get('ip_address'), line.get('contract_number'),
line.get('location_street'), line.get('location_zip'), line.get('location_city'))
)
logger.info(f"✅ Saved {len(extracted_fields['lines'])} line items")
else:
# Custom template from database
template = template_service.templates_cache.get(template_id)
if template:
vendor_id = template.get('vendor_id')
template_service.log_usage(template_id, file_id, True, confidence, extracted_fields)
# Update file - use NULL for invoice2data templates to avoid FK constraint
db_template_id = None if is_invoice2data else template_id
execute_update(
"""UPDATE incoming_files
SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP
WHERE file_id = %s""",
(db_template_id, file_id)
)
else:
# FALLBACK TO AI EXTRACTION
logger.info(f"⚠️ Ingen template match (confidence: {confidence:.0%}) - bruger AI extraction")
# Use detected vendor from quick analysis if available
vendor_id = file_record.get('detected_vendor_id')
# Call Ollama for full extraction
logger.info(f"🤖 Calling Ollama for AI extraction...")
llm_result = await ollama_service.extract_from_text(text)
# Handle both dict and string error responses
if not llm_result or isinstance(llm_result, str) or (isinstance(llm_result, dict) and 'error' in llm_result):
if isinstance(llm_result, dict):
error_msg = llm_result.get('error', 'AI extraction fejlede')
elif isinstance(llm_result, str):
error_msg = llm_result # Error message returned as string
else:
error_msg = 'AI extraction fejlede'
logger.error(f"❌ AI extraction failed: {error_msg}")
execute_update(
"""UPDATE incoming_files
SET status = 'failed',
error_message = %s,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = %s""",
(f"AI extraction fejlede: {error_msg}", file_id)
)
raise HTTPException(status_code=500, detail=f"AI extraction fejlede: {error_msg}")
extracted_fields = llm_result
confidence = llm_result.get('confidence', 0.75)
# Post-process: clear own CVR(s) if AI mistakenly returned them
extracted_cvr = llm_result.get('vendor_cvr')
own_cvr = getattr(settings, 'OWN_CVR', '29522790')
OWN_CVRS = {str(own_cvr).strip(), '29522790', '14416285'} # alle BMC CVR numre
extracted_cvr_clean = str(extracted_cvr).replace('DK', '').strip() if extracted_cvr else ''
if extracted_cvr_clean and extracted_cvr_clean in OWN_CVRS:
logger.warning(f"⚠️ AI returned own CVR ({extracted_cvr_clean}) as vendor_cvr - clearing it")
llm_result['vendor_cvr'] = None
extracted_cvr = None
# Also clear vendor_name if it looks like BMC
vendor_name = llm_result.get('vendor_name', '') or ''
if 'BMC' in vendor_name.upper() and 'DENMARK' in vendor_name.upper():
logger.warning(f"⚠️ AI returned own company name '{vendor_name}' as vendor_name - clearing it")
llm_result['vendor_name'] = None
# Try to find vendor in DB by extracted CVR or name (overrides detected_vendor_id)
if extracted_cvr:
cvr_clean = str(extracted_cvr).replace('DK', '').strip()
vendor_row = execute_query_single(
"SELECT id FROM vendors WHERE cvr_number = %s AND is_active = true",
(cvr_clean,))
if vendor_row:
vendor_id = vendor_row['id']
logger.info(f"✅ Matched vendor by CVR {cvr_clean}: vendor_id={vendor_id}")
execute_update(
"UPDATE incoming_files SET detected_vendor_id = %s WHERE file_id = %s",
(vendor_id, file_id))
if not vendor_id and llm_result.get('vendor_name'):
vendor_row = execute_query_single(
"SELECT id FROM vendors WHERE name ILIKE %s AND is_active = true ORDER BY id LIMIT 1",
(f"%{llm_result['vendor_name']}%",))
if vendor_row:
vendor_id = vendor_row['id']
logger.info(f"✅ Matched vendor by name '{llm_result['vendor_name']}': vendor_id={vendor_id}")
execute_update(
"UPDATE incoming_files SET detected_vendor_id = %s WHERE file_id = %s",
(vendor_id, file_id))
# Store AI extracted data in extractions table
extraction_id = execute_insert(
"""INSERT INTO extractions
(file_id, vendor_matched_id, vendor_name, vendor_cvr,
document_id, document_date, due_date,
total_amount, currency, document_type, document_type_detected,
confidence, llm_response_json, status)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING extraction_id""",
(file_id, vendor_id,
llm_result.get('vendor_name'),
llm_result.get('vendor_cvr'),
llm_result.get('invoice_number'),
llm_result.get('invoice_date'),
llm_result.get('due_date'),
llm_result.get('total_amount'),
llm_result.get('currency', 'DKK'),
llm_result.get('document_type', 'invoice'),
llm_result.get('document_type', 'invoice'),
confidence,
json.dumps(llm_result),
'extracted')
)
# Insert line items if extracted
if llm_result.get('lines'):
for idx, line in enumerate(llm_result['lines'], start=1):
execute_insert(
"""INSERT INTO extraction_lines
(extraction_id, line_number, description, quantity, unit_price,
line_total, vat_rate, confidence)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
RETURNING line_id""",
(extraction_id, idx, line.get('description'),
line.get('quantity'), line.get('unit_price'),
line.get('line_total'), line.get('vat_rate'),
confidence)
)
# Update file status to ai_extracted
execute_update(
"""UPDATE incoming_files
SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP
WHERE file_id = %s""",
(file_id,)
)
logger.info(f"✅ AI extraction completed for file {file_id}")
# Return success with template data or AI extraction result
# Determine confidence value safely
if template_id:
final_confidence = confidence
elif 'llm_result' in locals() and isinstance(llm_result, dict):
final_confidence = llm_result.get('confidence', 0.75)
else:
final_confidence = 0.0
result = {
"status": "success",
"file_id": file_id,
"filename": file_record['filename'],
"template_matched": template_id is not None,
"template_id": template_id,
"vendor_id": vendor_id,
"confidence": final_confidence,
"extracted_fields": extracted_fields,
"pdf_text": text[:1000] if not template_id else text
}
# Add warning if no template exists
if not template_id and vendor_id:
vendor = execute_query_single(
"SELECT name FROM vendors WHERE id = %s",
(vendor_id,))
if vendor:
result["warning"] = f"⚠️ Ingen template fundet for {vendor['name']} - brugte AI extraction (langsommere)"
return result
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Genbehandling fejlede: {str(e)}")
@router.post("/supplier-invoices/files/batch-analyze")
async def batch_analyze_files(background_tasks: BackgroundTasks):
"""
Kør AI-analyse alle ubehandlede filer i baggrunden.
Returnerer øjeblikkeligt filer behandles async.
"""
pending = execute_query(
"""SELECT file_id, filename FROM incoming_files
WHERE status IN ('pending', 'requires_vendor_selection', 'uploaded', 'failed')
ORDER BY uploaded_at DESC
LIMIT 100""",
()
)
if not pending:
return {"started": 0, "message": "Ingen filer at behandle"}
file_ids = [r['file_id'] for r in pending]
logger.info(f"🚀 Batch-analyse startet for {len(file_ids)} filer")
async def _run_batch(ids):
ok = err = 0
for fid in ids:
try:
await reprocess_uploaded_file(fid)
ok += 1
except Exception as ex:
logger.error(f"❌ Batch fejl file {fid}: {ex}")
err += 1
logger.info(f"✅ Batch færdig: {ok} ok, {err} fejlet")
background_tasks.add_task(_run_batch, file_ids)
return {
"started": len(file_ids),
"message": f"{len(file_ids)} filer sendt til analyse i baggrunden. Opdater siden om lidt.",
"analyzed": 0,
"requires_vendor_selection": 0,
"failed": 0
}
@router.put("/supplier-invoices/templates/{template_id}")
async def update_template(
template_id: int,
template_name: Optional[str] = None,
detection_patterns: Optional[List[Dict]] = None,
field_mappings: Optional[Dict] = None,
default_product_category: Optional[str] = None,
is_active: Optional[bool] = None
):
"""Opdater eksisterende template"""
try:
import json
updates = []
params = []
if template_name:
updates.append("template_name = %s")
params.append(template_name)
if detection_patterns is not None:
updates.append("detection_patterns = %s")
params.append(json.dumps(detection_patterns))
if field_mappings is not None:
updates.append("field_mappings = %s")
params.append(json.dumps(field_mappings))
if default_product_category is not None:
updates.append("default_product_category = %s")
params.append(default_product_category)
if is_active is not None:
updates.append("is_active = %s")
params.append(is_active)
if not updates:
raise HTTPException(status_code=400, detail="Ingen opdateringer angivet")
updates.append("updated_at = CURRENT_TIMESTAMP")
params.append(template_id)
execute_update(
f"UPDATE supplier_invoice_templates SET {', '.join(updates)} WHERE template_id = %s",
tuple(params)
)
# Reload templates
template_service.reload_templates()
logger.info(f"✅ Template {template_id} opdateret")
return {"message": "Template opdateret"}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update template: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/templates/invoice2data/{template_name}/test")
async def test_invoice2data_template(template_name: str, request: Dict):
"""
Test invoice2data YAML template mod PDF tekst
Request body:
{
"pdf_text": "Full PDF text content..."
}
Returns samme format som test_template endpoint
"""
try:
pdf_text = request.get('pdf_text', '')
if not pdf_text:
raise HTTPException(status_code=400, detail="pdf_text er påkrævet")
# Get invoice2data service
invoice2data_service = get_invoice2data_service()
# Check if template exists
if template_name not in invoice2data_service.templates:
raise HTTPException(status_code=404, detail=f"Template '{template_name}' ikke fundet")
template_data = invoice2data_service.templates[template_name]
# Test extraction
result = invoice2data_service.extract_with_template(pdf_text, template_name)
if not result:
# Template didn't match
keywords = template_data.get('keywords', [])
detection_results = []
for keyword in keywords:
found = str(keyword).lower() in pdf_text.lower()
detection_results.append({
"pattern": str(keyword),
"type": "keyword",
"found": found,
"weight": 0.5
})
return {
"matched": False,
"confidence": 0.0,
"extracted_fields": {},
"line_items": [],
"detection_results": detection_results,
"template_name": template_name,
"error": "Template matchede ikke PDF'en"
}
# Extract line items
line_items = []
if 'lines' in result:
for line in result['lines']:
line_items.append({
"line_number": line.get('line_number', ''),
"item_number": line.get('item_number', ''),
"description": line.get('description_raw', '') or line.get('description', ''),
"quantity": line.get('quantity', ''),
"unit_price": line.get('unit_price', ''),
"line_total": line.get('line_total', ''),
# Context fields (circuit/location info)
"circuit_id": line.get('circuit_id', ''),
"ip_address": line.get('ip_address', ''),
"contract_number": line.get('contract_number', ''),
"location_street": line.get('location_street', ''),
"location_zip": line.get('location_zip', ''),
"location_city": line.get('location_city', ''),
})
# Build detection results
keywords = template_data.get('keywords', [])
detection_results = []
matched_count = 0
for keyword in keywords:
found = str(keyword).lower() in pdf_text.lower()
if found:
matched_count += 1
detection_results.append({
"pattern": str(keyword),
"type": "keyword",
"found": found,
"weight": 0.5
})
confidence = matched_count / len(keywords) if keywords else 1.0
# Remove 'lines' from extracted_fields to avoid duplication
extracted_fields = {k: v for k, v in result.items() if k != 'lines'}
return {
"matched": True,
"confidence": confidence,
"extracted_fields": extracted_fields,
"line_items": line_items,
"detection_results": detection_results,
"template_name": template_name
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Invoice2data template test failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/templates/{template_id}/test")
async def test_template(template_id: int, request: Dict):
"""
Test template mod PDF tekst
Request body:
{
"pdf_text": "Full PDF text content..."
}
Returns:
{
"matched": true/false,
"confidence": 0.85,
"extracted_fields": {
"invoice_number": "12345",
"invoice_date": "01/12-25",
"total_amount": "1234.56",
"vendor_cvr": "12345678"
},
"detection_results": [
{"pattern": "BMC Denmark ApS", "found": true, "weight": 0.5}
]
}
"""
try:
import re
import json
pdf_text = request.get('pdf_text', '')
if not pdf_text:
raise HTTPException(status_code=400, detail="pdf_text er påkrævet")
# Fetch template
query = "SELECT * FROM supplier_invoice_templates WHERE template_id = %s"
template = execute_query(query, (template_id,))
if not template:
raise HTTPException(status_code=404, detail="Template ikke fundet")
template = template[0]
detection_patterns = template.get('detection_patterns', [])
field_mappings = template.get('field_mappings', {})
# Test detection patterns
total_score = 0.0
max_score = 0.0
detection_results = []
for pattern in detection_patterns:
pattern_type = pattern.get('type', 'text')
pattern_value = pattern.get('pattern', '')
weight = float(pattern.get('weight', 0.5))
max_score += weight
found = False
if pattern_type == 'text' and pattern_value in pdf_text:
found = True
total_score += weight
detection_results.append({
"pattern": pattern_value,
"type": pattern_type,
"found": found,
"weight": weight
})
confidence = (total_score / max_score) if max_score > 0 else 0.0
matched = confidence >= 0.7 # Match threshold
# Extract fields if matched
extracted_fields = {}
if matched:
for field_name, field_config in field_mappings.items():
pattern = field_config.get('pattern', '')
group = field_config.get('group', 1)
# Skip non-field patterns (lines_start, lines_end, line_item)
if field_name in ['lines_start', 'lines_end', 'line_item']:
continue
try:
match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE)
if match and len(match.groups()) >= group:
extracted_fields[field_name] = match.group(group).strip()
except Exception as e:
logger.warning(f"Pattern match failed for {field_name}: {e}")
# Extract line items if matched
line_items = []
if matched:
# Extract line items using smart extraction
lines_start = field_mappings.get('lines_start', {}).get('pattern')
lines_end = field_mappings.get('lines_end', {}).get('pattern')
line_pattern = field_mappings.get('line_item', {}).get('pattern')
line_fields = field_mappings.get('line_item', {}).get('fields', [])
if line_pattern or lines_start:
# Extract section between start and end markers
text_section = pdf_text
if lines_start:
try:
start_match = re.search(lines_start, pdf_text, re.IGNORECASE)
if start_match:
text_section = pdf_text[start_match.end():]
logger.debug(f"Found lines_start at position {start_match.end()}")
except Exception as e:
logger.warning(f"Failed to find lines_start: {e}")
if lines_end:
try:
end_match = re.search(lines_end, text_section, re.IGNORECASE)
if end_match:
text_section = text_section[:end_match.start()]
logger.debug(f"Found lines_end at position {end_match.start()}")
except Exception as e:
logger.warning(f"Failed to find lines_end: {e}")
# Try pattern first, then smart extraction
if line_pattern:
try:
for match in re.finditer(line_pattern, text_section, re.MULTILINE):
line_data = {
'line_number': len(line_items) + 1,
'raw_text': match.group(0)
}
for idx, field_name in enumerate(line_fields, start=1):
if idx <= len(match.groups()):
line_data[field_name] = match.group(idx).strip()
line_items.append(line_data)
except Exception as e:
logger.error(f"❌ Pattern extraction failed: {e}")
# Fallback to smart extraction if no lines found
if not line_items:
logger.info("🧠 Trying smart extraction...")
logger.debug(f"Text section length: {len(text_section)}, first 500 chars: {text_section[:500]}")
line_items = _smart_extract_lines(text_section)
logger.info(f"🧠 Smart extraction returned {len(line_items)} items")
if line_items:
logger.info(f"📦 Extracted {len(line_items)} line items from test")
else:
logger.warning(f"⚠️ No line items matched. Section length: {len(text_section)} chars")
logger.debug(f"Section preview: {text_section[:300]}")
logger.info(f"🧪 Template {template_id} test: matched={matched}, confidence={confidence:.2f}, lines={len(line_items)}")
return {
"matched": matched,
"confidence": round(confidence, 2),
"extracted_fields": extracted_fields,
"line_items": line_items,
"detection_results": detection_results,
"template_name": template.get('template_name', '')
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Template test failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.put("/supplier-invoices/templates/invoice2data/{template_name}/category")
async def update_yaml_category(template_name: str, request: Dict):
"""
Opdater default_product_category i YAML template fil
Request body:
{
"category": "drift" // varesalg, drift, anlæg, abonnement, lager, udlejning
}
"""
try:
import yaml
from pathlib import Path
new_category = request.get('category')
if not new_category:
raise HTTPException(status_code=400, detail="category er påkrævet")
# Validate category
valid_categories = ['varesalg', 'drift', 'anlæg', 'abonnement', 'lager', 'udlejning']
if new_category not in valid_categories:
raise HTTPException(status_code=400, detail=f"Ugyldig kategori. Skal være en af: {', '.join(valid_categories)}")
# Find YAML file
templates_dir = Path(__file__).parent.parent.parent.parent / 'data' / 'invoice_templates'
yaml_file = templates_dir / f"{template_name}.yml"
if not yaml_file.exists():
raise HTTPException(status_code=404, detail=f"YAML fil ikke fundet: {template_name}.yml")
# Load YAML
with open(yaml_file, 'r', encoding='utf-8') as f:
template_data = yaml.safe_load(f)
# Update category
template_data['default_product_category'] = new_category
# Save YAML with preserved formatting
with open(yaml_file, 'w', encoding='utf-8') as f:
yaml.dump(template_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
# Reload invoice2data service to pick up changes
invoice2data_service = get_invoice2data_service()
invoice2data_service.__init__() # Reinitialize to reload templates
logger.info(f"✅ Updated category for {template_name}.yml to {new_category}")
return {
"message": "Kategori opdateret",
"template_name": template_name,
"new_category": new_category
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to update YAML category: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/templates/invoice2data/{template_name}/content")
async def get_yaml_content(template_name: str):
"""
Hent råt YAML indhold fra template fil
Returns:
{
"content": "issuer: DCS ApS\nkeywords: ..."
}
"""
try:
from pathlib import Path
# Find template file
template_dir = Path("data/invoice_templates")
template_file = template_dir / f"{template_name}.yml"
if not template_file.exists():
raise HTTPException(status_code=404, detail=f"Template fil ikke fundet: {template_name}.yml")
# Read file content
content = template_file.read_text(encoding='utf-8')
return {
"template_name": template_name,
"filename": f"{template_name}.yml",
"content": content
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Failed to read YAML content: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/supplier-invoices/templates/{template_id}")
async def delete_template(template_id: int):
"""Slet template (soft delete - sæt is_active=false)"""
try:
execute_update(
"UPDATE supplier_invoice_templates SET is_active = false WHERE template_id = %s",
(template_id,)
)
template_service.reload_templates()
logger.info(f"✅ Template {template_id} deaktiveret")
return {"message": "Template slettet"}
except Exception as e:
logger.error(f"❌ Failed to delete template: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Helper function for creating invoice from file
async def create_invoice_from_file(file_id: int, vendor_id: int) -> int:
"""Create a minimal supplier invoice from file without full extraction"""
try:
file_info = execute_query_single(
"SELECT filename, file_path FROM incoming_files WHERE file_id = %s",
(file_id,)
)
if not file_info:
raise ValueError(f"File {file_id} not found")
# Create minimal invoice record
invoice_id = execute_insert(
"""INSERT INTO supplier_invoices (
vendor_id, invoice_number, invoice_date, due_date,
total_amount, currency, status, notes
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id""",
(
vendor_id,
f"PENDING-{file_id}", # Temporary invoice number
datetime.now().date(), # Use today as placeholder
(datetime.now() + timedelta(days=30)).date(), # Due in 30 days
0.00, # Amount to be filled manually
'DKK',
'unpaid',
f"Oprettet fra fil: {file_info['filename']} (file_id: {file_id})"
)
)
logger.info(f"✅ Created minimal invoice {invoice_id} for file {file_id}, vendor {vendor_id}")
return invoice_id
except Exception as e:
logger.error(f"❌ Failed to create invoice from file: {e}")
raise
@router.post("/supplier-invoices/files/{file_id}/match-vendor")
async def match_vendor_for_file(file_id: int):
"""
Match vendor for uploaded file with confidence scoring
Returns list of vendors with confidence scores:
- 100% = Exact CVR match
- 90% = Email domain match
- 70% = Fuzzy name match
"""
try:
# Get file info with detected CVR and vendor
file_info = execute_query(
"""SELECT file_id, detected_cvr, detected_vendor_id, filename
FROM incoming_files WHERE file_id = %s""",
(file_id,)
)
if not file_info:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
file_data = file_info[0]
detected_cvr = file_data.get('detected_cvr')
detected_vendor_id = file_data.get('detected_vendor_id')
vendor_matches = []
# Get all active vendors
vendors = execute_query("SELECT id, name, cvr_number, email, domain FROM vendors WHERE is_active = true ORDER BY name")
if not vendors:
vendors = []
# If file already has detected_vendor_id, use it as 100% match
if detected_vendor_id:
matched_vendor = next((v for v in vendors if v['id'] == detected_vendor_id), None)
if matched_vendor:
vendor_matches.append({
"vendor_id": matched_vendor['id'],
"vendor_name": matched_vendor['name'],
"cvr_number": matched_vendor.get('cvr_number'),
"confidence": 100,
"match_reason": "Automatically detected from email",
"is_exact_match": True
})
# Auto-select this vendor and create invoice
logger.info(f"✅ Auto-selected vendor {matched_vendor['name']} (ID: {detected_vendor_id}) from detected_vendor_id")
# Create supplier invoice directly
invoice_id = await create_invoice_from_file(file_id, detected_vendor_id)
# Update file status
execute_update(
"UPDATE incoming_files SET status = 'analyzed' WHERE file_id = %s",
(file_id,)
)
return {
"file_id": file_id,
"filename": file_data['filename'],
"detected_cvr": detected_cvr,
"matches": vendor_matches,
"auto_selected": vendor_matches[0],
"requires_manual_selection": False,
"invoice_id": invoice_id,
"message": f"Leverandør auto-valgt: {matched_vendor['name']}"
}
for vendor in vendors:
# Skip if already matched by detected_vendor_id
if detected_vendor_id and vendor['id'] == detected_vendor_id:
continue
confidence = 0
match_reason = []
# 100% = Exact CVR match
if detected_cvr and vendor.get('cvr_number'):
if detected_cvr.strip() == str(vendor['cvr_number']).strip():
confidence = 100
match_reason.append("Exact CVR match")
# 90% = Email domain match (if we have extracted sender email from file metadata)
# Note: This requires additional extraction logic - placeholder for now
# 70% = Fuzzy name match (simple contains check for now)
if confidence == 0:
filename_lower = file_data['filename'].lower()
vendor_name_lower = vendor['name'].lower()
# Check if vendor name appears in filename
if vendor_name_lower in filename_lower or filename_lower in vendor_name_lower:
confidence = 70
match_reason.append("Name appears in filename")
if confidence > 0:
vendor_matches.append({
"vendor_id": vendor['id'],
"vendor_name": vendor['name'],
"cvr_number": vendor.get('cvr_number'),
"confidence": confidence,
"match_reason": ", ".join(match_reason),
"is_exact_match": confidence == 100
})
# Sort by confidence descending
vendor_matches.sort(key=lambda x: x['confidence'], reverse=True)
# If we have a 100% match, auto-select it
auto_selected = None
if vendor_matches and vendor_matches[0]['confidence'] == 100:
auto_selected = vendor_matches[0]
# Update file with detected vendor
execute_update(
"UPDATE incoming_files SET detected_vendor_id = %s WHERE file_id = %s",
(auto_selected['vendor_id'], file_id)
)
logger.info(f"✅ Auto-matched vendor {auto_selected['vendor_name']} (100% CVR match) for file {file_id}")
return {
"file_id": file_id,
"filename": file_data['filename'],
"detected_cvr": detected_cvr,
"matches": vendor_matches,
"auto_selected": auto_selected,
"requires_manual_selection": auto_selected is None
}
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Vendor matching failed for file {file_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/suggest-line-codes")
async def suggest_line_codes(vendor_id: int, description: str):
"""
Suggest VAT code, contra account, and line purpose based on historical data
Uses weighted scoring: score = match_count × (1.0 + 1.0/(days_old + 1))
Newer matches are weighted higher. Requires minimum 3 matches to return suggestion.
"""
try:
from datetime import datetime
from difflib import SequenceMatcher
# Get all lines from this vendor with vat_code, contra_account, line_purpose set
history_lines = execute_query(
"""SELECT sil.description, sil.vat_code, sil.contra_account, sil.line_purpose,
si.invoice_date, si.created_at
FROM supplier_invoice_lines sil
JOIN supplier_invoices si ON sil.supplier_invoice_id = si.id
WHERE si.vendor_id = %s
AND sil.vat_code IS NOT NULL
ORDER BY si.created_at DESC
LIMIT 500""",
(vendor_id,)
)
if not history_lines:
return {
"vendor_id": vendor_id,
"description": description,
"suggestions": [],
"has_suggestions": False,
"note": "No historical data found for this vendor"
}
# Score each unique combination
combination_scores = {}
description_lower = description.lower().strip()
for line in history_lines:
hist_desc = (line.get('description') or '').lower().strip()
# Fuzzy match descriptions
similarity = SequenceMatcher(None, description_lower, hist_desc).ratio()
# Only consider matches with >60% similarity
if similarity < 0.6:
continue
# Calculate recency weight
created_at = line.get('created_at') or line.get('invoice_date')
if isinstance(created_at, str):
created_at = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
days_old = (datetime.now() - created_at).days if created_at else 365
recency_weight = 1.0 + (1.0 / (days_old + 1))
# Create combination key
combo_key = (
line.get('vat_code'),
line.get('contra_account'),
line.get('line_purpose')
)
if combo_key not in combination_scores:
combination_scores[combo_key] = {
'vat_code': line.get('vat_code'),
'contra_account': line.get('contra_account'),
'line_purpose': line.get('line_purpose'),
'match_count': 0,
'total_similarity': 0,
'weighted_score': 0,
'last_used': created_at,
'example_descriptions': []
}
combination_scores[combo_key]['match_count'] += 1
combination_scores[combo_key]['total_similarity'] += similarity
combination_scores[combo_key]['weighted_score'] += similarity * recency_weight
if len(combination_scores[combo_key]['example_descriptions']) < 3:
combination_scores[combo_key]['example_descriptions'].append(line.get('description'))
# Update last_used if this is newer
if created_at and (not combination_scores[combo_key]['last_used'] or created_at > combination_scores[combo_key]['last_used']):
combination_scores[combo_key]['last_used'] = created_at
# Filter to combinations with ≥3 matches
valid_suggestions = [
combo for combo in combination_scores.values()
if combo['match_count'] >= 3
]
# Sort by weighted score
valid_suggestions.sort(key=lambda x: x['weighted_score'], reverse=True)
# Format suggestions
formatted_suggestions = []
for suggestion in valid_suggestions[:5]: # Top 5 suggestions
formatted_suggestions.append({
'vat_code': suggestion['vat_code'],
'contra_account': suggestion['contra_account'],
'line_purpose': suggestion['line_purpose'],
'match_count': suggestion['match_count'],
'confidence_score': round(suggestion['weighted_score'], 2),
'last_used': suggestion['last_used'].strftime('%Y-%m-%d') if suggestion['last_used'] else None,
'example_descriptions': suggestion['example_descriptions']
})
return {
"vendor_id": vendor_id,
"description": description,
"suggestions": formatted_suggestions,
"has_suggestions": len(formatted_suggestions) > 0,
"top_suggestion": formatted_suggestions[0] if formatted_suggestions else None
}
except Exception as e:
logger.error(f"❌ Line code suggestion failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/files/batch-analyze")
async def batch_analyze_files():
"""
Batch analyze all pending files using cascade extraction:
1. invoice2data (YAML templates) - fastest
2. template_service (regex patterns) - if invoice2data fails
3. ollama AI - as last backup
Auto-creates invoices for files with 100% vendor match.
Files with <100% match remain pending for manual vendor selection.
"""
try:
# Get all pending files
pending_files = execute_query(
"""SELECT file_id, filename, file_path, detected_vendor_id, detected_cvr
FROM incoming_files
WHERE status IN ('pending', 'extraction_failed')
ORDER BY uploaded_at DESC"""
)
if not pending_files:
return {
"message": "No pending files to analyze",
"analyzed": 0,
"invoices_created": 0,
"failed": 0,
"requires_vendor_selection": 0
}
results = {
"analyzed": 0,
"invoices_created": 0,
"failed": 0,
"requires_vendor_selection": 0,
"details": []
}
for file_data in pending_files:
file_id = file_data['file_id']
filename = file_data['filename']
try:
# Run vendor matching first
vendor_match_result = await match_vendor_for_file(file_id)
# If no 100% match, skip extraction and mark for manual selection
if vendor_match_result.get('requires_manual_selection'):
execute_update(
"UPDATE incoming_files SET status = 'requires_vendor_selection' WHERE file_id = %s",
(file_id,)
)
results['requires_vendor_selection'] += 1
results['details'].append({
"file_id": file_id,
"filename": filename,
"status": "requires_vendor_selection",
"vendor_matches": len(vendor_match_result.get('matches', []))
})
continue
# We have 100% vendor match - proceed with extraction cascade
vendor_id = vendor_match_result['auto_selected']['vendor_id']
# Try extraction cascade (this logic should be moved to a helper function)
# For now, mark as analyzed
execute_update(
"UPDATE incoming_files SET status = 'analyzed', detected_vendor_id = %s WHERE file_id = %s",
(vendor_id, file_id)
)
results['analyzed'] += 1
results['details'].append({
"file_id": file_id,
"filename": filename,
"status": "analyzed",
"vendor_id": vendor_id,
"vendor_name": vendor_match_result['auto_selected']['vendor_name']
})
logger.info(f"✅ Analyzed file {file_id}: {filename}")
except Exception as e:
logger.error(f"❌ Batch analysis failed for file {file_id}: {e}")
execute_update(
"UPDATE incoming_files SET status = 'extraction_failed' WHERE file_id = %s",
(file_id,)
)
results['failed'] += 1
results['details'].append({
"file_id": file_id,
"filename": filename,
"status": "extraction_failed",
"error": str(e)
})
logger.info(f"✅ Batch analysis complete: {results['analyzed']} analyzed, {results['invoices_created']} invoices created, {results['failed']} failed")
return results
except Exception as e:
logger.error(f"❌ Batch analysis failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/files/{file_id}/retry")
async def retry_extraction(file_id: int):
"""
Retry extraction for a failed file
Re-runs the cascade: invoice2data template_service ollama AI
"""
try:
# Check if file exists
file_info = execute_query(
"SELECT file_id, filename, file_path, status FROM incoming_files WHERE file_id = %s",
(file_id,)
)
if not file_info:
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
file_data = file_info[0]
# Reset status to pending
execute_update(
"UPDATE incoming_files SET status = 'pending' WHERE file_id = %s",
(file_id,)
)
logger.info(f"🔄 Retrying extraction for file {file_id}: {file_data['filename']}")
# Run full extraction cascade immediately
result = await reprocess_uploaded_file(file_id)
return result
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ Retry extraction failed for file {file_id}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))