- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals. - Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number. - Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table. - Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2671 lines
109 KiB
Python
2671 lines
109 KiB
Python
"""
|
||
Supplier Invoices Router - Leverandørfakturaer (Kassekladde)
|
||
Backend API for managing supplier invoices that integrate with e-conomic
|
||
"""
|
||
|
||
from fastapi import APIRouter, HTTPException, UploadFile, File
|
||
from typing import List, Dict, Optional
|
||
from datetime import datetime, date, timedelta
|
||
from decimal import Decimal
|
||
from pathlib import Path
|
||
from app.core.database import execute_query, execute_insert, execute_update
|
||
from app.core.config import settings
|
||
from app.services.economic_service import get_economic_service
|
||
from app.services.ollama_service import ollama_service
|
||
from app.services.template_service import template_service
|
||
from app.services.invoice2data_service import get_invoice2data_service
|
||
import logging
|
||
import os
|
||
import re
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter()
|
||
|
||
|
||
def _smart_extract_lines(text: str) -> List[Dict]:
|
||
"""
|
||
Universal line extraction using pdfplumber layout mode.
|
||
Tries pdfplumber columnar format first, then falls back to vendor-specific patterns.
|
||
"""
|
||
lines_arr = text.split('\n')
|
||
items = []
|
||
i = 0
|
||
|
||
while i < len(lines_arr):
|
||
line = lines_arr[i].strip()
|
||
|
||
# Skip empty or header lines
|
||
if not line or re.search(r'(Position|Varenr|Beskrivelse|Antal|Pris|Total|Model)', line, re.IGNORECASE):
|
||
i += 1
|
||
continue
|
||
|
||
# Pattern 1: pdfplumber layout mode - " 1 95006 Betalingsmetode... 1 41,20 41,20"
|
||
# Whitespace-separated columns: position item_number description quantity unit_price total_price
|
||
# Most specific pattern - try first!
|
||
layout_match = re.match(r'^\s*(\d{1,2})\s+(\d{4,10})\s+(.+?)\s(\d{1,2})\s+([\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$', line)
|
||
if layout_match:
|
||
items.append({
|
||
'line_number': len(items) + 1,
|
||
'position': layout_match.group(1),
|
||
'item_number': layout_match.group(2),
|
||
'description': layout_match.group(3).strip(),
|
||
'quantity': layout_match.group(4),
|
||
'unit_price': layout_match.group(5).replace(' ', '').replace(',', '.'),
|
||
'total_price': layout_match.group(6).replace(' ', '').replace(',', '.'),
|
||
'raw_text': line
|
||
})
|
||
logger.info(f"✅ pdfplumber layout: {layout_match.group(2)} - {layout_match.group(3)[:30]}...")
|
||
i += 1
|
||
continue
|
||
|
||
# Pattern 2: ALSO format - "100 48023976 REFURB LENOVO..." (multi-line)
|
||
item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line)
|
||
if item_match:
|
||
position = item_match.group(1)
|
||
item_number = item_match.group(2)
|
||
description = item_match.group(3).strip()
|
||
|
||
# Find næste linje med antal+priser
|
||
quantity = None
|
||
unit_price = None
|
||
total_price = None
|
||
|
||
for j in range(i+1, min(i+10, len(lines_arr))):
|
||
price_line = lines_arr[j].strip()
|
||
price_match = re.match(r'^(\d+)\s*(?:ST|stk|pc|pcs)\s+([\d.,]+)\s+([\d.,]+)', price_line, re.IGNORECASE)
|
||
if price_match:
|
||
quantity = price_match.group(1)
|
||
unit_price = price_match.group(2).replace(',', '.')
|
||
total_price = price_match.group(3).replace(',', '.')
|
||
break
|
||
|
||
if quantity and unit_price:
|
||
items.append({
|
||
'line_number': len(items) + 1,
|
||
'position': position,
|
||
'item_number': item_number,
|
||
'description': description,
|
||
'quantity': quantity,
|
||
'unit_price': unit_price,
|
||
'total_price': total_price,
|
||
'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}"
|
||
})
|
||
logger.info(f"✅ ALSO: {item_number} - {description[:30]}...")
|
||
i += 1
|
||
continue
|
||
|
||
# Pattern 3: DCS single-line - "195006Betalingsmetode... 141,2041,20" (legacy PyPDF2 format)
|
||
# Position: 1 digit, Item: 4-10 digits, Description starts with letter
|
||
# Prices: Danish format 1-3 digits, comma, 2 decimals (e.g., 41,20 or 619,00)
|
||
# Quantity: 1-2 digits (non-greedy) before first price
|
||
dcs_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+?)(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})$', line)
|
||
if dcs_match:
|
||
items.append({
|
||
'line_number': len(items) + 1,
|
||
'position': dcs_match.group(1),
|
||
'item_number': dcs_match.group(2),
|
||
'description': dcs_match.group(3).strip(),
|
||
'quantity': dcs_match.group(4),
|
||
'unit_price': dcs_match.group(5).replace(',', '.'),
|
||
'total_price': dcs_match.group(6).replace(',', '.'),
|
||
'raw_text': line
|
||
})
|
||
logger.info(f"✅ DCS single-line: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...")
|
||
i += 1
|
||
continue
|
||
|
||
# Pattern 4: DCS multi-line - "2994922511Ubiquiti..." then search for "...USW-FLEX 1619,00619,00" (legacy)
|
||
dcs_multi_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+)$', line)
|
||
if dcs_multi_match and not re.search(r'KN8|EAN|Model|Position|Varenr|Tekst', line):
|
||
position = dcs_multi_match.group(1)
|
||
item_number = dcs_multi_match.group(2)
|
||
description = dcs_multi_match.group(3).strip()
|
||
|
||
# Search next 5 lines for quantity/prices (Danish format 1-3 digits before comma)
|
||
for j in range(1, 6):
|
||
if i + j >= len(lines_arr):
|
||
break
|
||
price_line = lines_arr[i + j].strip()
|
||
# Match: "S/N: ...USW-FLEX 1619,00619,00" - qty (1-2 digits, non-greedy) + TWO prices
|
||
price_match = re.search(r'(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})\s*$', price_line)
|
||
if price_match:
|
||
quantity = price_match.group(1)
|
||
unit_price = price_match.group(2).replace(',', '.')
|
||
total_price = price_match.group(3).replace(',', '.')
|
||
items.append({
|
||
'line_number': len(items) + 1,
|
||
'position': position,
|
||
'item_number': item_number,
|
||
'description': description,
|
||
'quantity': quantity,
|
||
'unit_price': unit_price,
|
||
'total_price': total_price,
|
||
'raw_text': f"{line} ... {price_line}"
|
||
})
|
||
logger.info(f"✅ DCS multi-line: {item_number} - {description[:30]}...")
|
||
break
|
||
i += 1
|
||
continue
|
||
|
||
i += 1
|
||
|
||
if items:
|
||
logger.info(f"📦 Multi-line extraction found {len(items)} items")
|
||
else:
|
||
logger.warning("⚠️ Multi-line extraction found no items")
|
||
return items
|
||
|
||
|
||
# ========== CRUD OPERATIONS ==========
|
||
|
||
@router.get("/supplier-invoices")
|
||
async def list_supplier_invoices(
|
||
status: Optional[str] = None,
|
||
vendor_id: Optional[int] = None,
|
||
overdue_only: bool = False
|
||
):
|
||
"""
|
||
List all supplier invoices with filtering options
|
||
|
||
Args:
|
||
status: Filter by status (pending, approved, sent_to_economic, paid, overdue, cancelled)
|
||
vendor_id: Filter by vendor
|
||
overdue_only: Only show overdue unpaid invoices
|
||
"""
|
||
try:
|
||
query = """
|
||
SELECT
|
||
si.*,
|
||
v.name as vendor_full_name,
|
||
v.economic_supplier_number as vendor_economic_id,
|
||
CASE
|
||
WHEN si.paid_date IS NOT NULL THEN 'paid'
|
||
WHEN si.due_date < CURRENT_DATE AND si.paid_date IS NULL THEN 'overdue'
|
||
ELSE si.status
|
||
END as computed_status
|
||
FROM supplier_invoices si
|
||
LEFT JOIN vendors v ON si.vendor_id = v.id
|
||
WHERE 1=1
|
||
"""
|
||
params = []
|
||
|
||
if status:
|
||
query += " AND si.status = %s"
|
||
params.append(status)
|
||
|
||
if vendor_id:
|
||
query += " AND si.vendor_id = %s"
|
||
params.append(vendor_id)
|
||
|
||
if overdue_only:
|
||
query += " AND si.due_date < CURRENT_DATE AND si.paid_date IS NULL"
|
||
|
||
query += " ORDER BY si.due_date ASC, si.invoice_date DESC"
|
||
|
||
invoices = execute_query(query, tuple(params) if params else ())
|
||
|
||
# Add lines to each invoice
|
||
for invoice in invoices:
|
||
lines = execute_query(
|
||
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
|
||
(invoice['id'],)
|
||
)
|
||
invoice['lines'] = lines
|
||
|
||
return invoices
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to list supplier invoices: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/pending-supplier-invoice-files")
|
||
async def get_pending_files():
|
||
"""Hent alle filer der venter på behandling, inkl. AI-extracted"""
|
||
try:
|
||
# Hent både pending files OG ai_extracted files
|
||
files = execute_query(
|
||
"""SELECT DISTINCT ON (f.file_id)
|
||
f.file_id,
|
||
f.filename,
|
||
f.status,
|
||
f.uploaded_at,
|
||
f.error_message,
|
||
f.template_id,
|
||
f.file_path,
|
||
-- Quick analysis results (available immediately on upload)
|
||
f.detected_cvr,
|
||
f.detected_vendor_id,
|
||
f.detected_document_type,
|
||
f.detected_document_number,
|
||
f.is_own_invoice,
|
||
v_detected.name as detected_vendor_name,
|
||
v_detected.cvr_number as detected_vendor_cvr,
|
||
-- Get vendor info from latest extraction
|
||
ext.vendor_name,
|
||
ext.vendor_cvr,
|
||
ext.vendor_matched_id,
|
||
v.name as matched_vendor_name,
|
||
v.cvr_number as matched_vendor_cvr_number,
|
||
-- Check if already has invoice via latest extraction only
|
||
si.id as existing_invoice_id,
|
||
si.invoice_number as existing_invoice_number
|
||
FROM incoming_files f
|
||
LEFT JOIN vendors v_detected ON v_detected.id = f.detected_vendor_id
|
||
LEFT JOIN LATERAL (
|
||
SELECT extraction_id, file_id, vendor_name, vendor_cvr, vendor_matched_id
|
||
FROM extractions
|
||
WHERE file_id = f.file_id
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
) ext ON true
|
||
LEFT JOIN vendors v ON v.id = ext.vendor_matched_id
|
||
LEFT JOIN supplier_invoices si ON si.extraction_id = ext.extraction_id
|
||
WHERE f.status IN ('pending', 'processing', 'failed', 'ai_extracted', 'processed', 'duplicate')
|
||
AND si.id IS NULL -- Only show files without invoice yet
|
||
ORDER BY f.file_id, f.uploaded_at DESC"""
|
||
)
|
||
|
||
# Convert to regular dicts so we can add new keys
|
||
files = [dict(file) for file in files] if files else []
|
||
|
||
# Check for invoice2data templates for each file
|
||
try:
|
||
from app.services.invoice2data_service import get_invoice2data_service
|
||
invoice2data = get_invoice2data_service()
|
||
logger.info(f"📋 Checking invoice2data templates: {len(invoice2data.templates)} loaded")
|
||
|
||
for file in files:
|
||
# Check if there's an invoice2data template for this vendor's CVR
|
||
vendor_cvr = file.get('matched_vendor_cvr_number') or file.get('detected_vendor_cvr') or file.get('vendor_cvr')
|
||
file['has_invoice2data_template'] = False
|
||
|
||
logger.debug(f" File {file['file_id']}: CVR={vendor_cvr}")
|
||
|
||
if vendor_cvr:
|
||
# Check all templates for this CVR in keywords
|
||
for template_name, template_data in invoice2data.templates.items():
|
||
keywords = template_data.get('keywords', [])
|
||
logger.debug(f" Template {template_name}: keywords={keywords}")
|
||
if str(vendor_cvr) in [str(k) for k in keywords]:
|
||
file['has_invoice2data_template'] = True
|
||
file['invoice2data_template_name'] = template_name
|
||
logger.info(f" ✅ File {file['file_id']} matched template: {template_name}")
|
||
break
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to check invoice2data templates: {e}", exc_info=True)
|
||
# Continue without invoice2data info
|
||
|
||
return {"files": files if files else [], "count": len(files) if files else 0}
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get pending files: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/files/{file_id}/pdf-text")
|
||
async def get_file_pdf_text(file_id: int):
|
||
"""Hent fuld PDF tekst fra en uploaded fil (til template builder)"""
|
||
try:
|
||
# Get file info
|
||
file_info = execute_query(
|
||
"SELECT file_path, filename FROM incoming_files WHERE file_id = %s",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not file_info:
|
||
raise HTTPException(status_code=404, detail="Fil ikke fundet")
|
||
|
||
# Read PDF text
|
||
from pathlib import Path
|
||
file_path = Path(file_info['file_path'])
|
||
if not file_path.exists():
|
||
raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}")
|
||
|
||
pdf_text = await ollama_service._extract_text_from_file(file_path)
|
||
|
||
return {
|
||
"file_id": file_id,
|
||
"filename": file_info['filename'],
|
||
"pdf_text": pdf_text
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get PDF text: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/files/{file_id}/extracted-data")
|
||
async def get_file_extracted_data(file_id: int):
|
||
"""Hent AI-extracted data fra en uploaded fil"""
|
||
try:
|
||
# Get file info
|
||
file_info = execute_query(
|
||
"SELECT * FROM incoming_files WHERE file_id = %s",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not file_info:
|
||
raise HTTPException(status_code=404, detail="Fil ikke fundet")
|
||
|
||
# Get extraction results if exists
|
||
extraction = execute_query(
|
||
"SELECT * FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
# Parse llm_response_json if it exists (from AI or template extraction)
|
||
llm_json_data = None
|
||
if extraction and extraction.get('llm_response_json'):
|
||
import json
|
||
try:
|
||
llm_json_data = json.loads(extraction['llm_response_json']) if isinstance(extraction['llm_response_json'], str) else extraction['llm_response_json']
|
||
logger.info(f"📊 Parsed llm_response_json: invoice_number={llm_json_data.get('invoice_number')}")
|
||
except Exception as e:
|
||
logger.warning(f"⚠️ Failed to parse llm_response_json: {e}")
|
||
|
||
# Get extraction lines if exist
|
||
extraction_lines = []
|
||
if extraction:
|
||
extraction_lines = execute_query(
|
||
"""SELECT * FROM extraction_lines
|
||
WHERE extraction_id = %s
|
||
ORDER BY line_number""",
|
||
(extraction['extraction_id'],)
|
||
)
|
||
|
||
# Read PDF text if needed
|
||
pdf_text = None
|
||
if file_info['file_path']:
|
||
from pathlib import Path
|
||
file_path = Path(file_info['file_path'])
|
||
if file_path.exists():
|
||
pdf_text = await ollama_service._extract_text_from_file(file_path)
|
||
|
||
# Format line items for frontend
|
||
formatted_lines = []
|
||
if extraction_lines:
|
||
for line in extraction_lines:
|
||
formatted_lines.append({
|
||
"description": line.get('description'),
|
||
"quantity": float(line.get('quantity')) if line.get('quantity') else None,
|
||
"unit_price": float(line.get('unit_price')) if line.get('unit_price') else None,
|
||
"vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None,
|
||
"line_total": float(line.get('line_total')) if line.get('line_total') else None,
|
||
"vat_note": line.get('vat_note')
|
||
})
|
||
elif llm_json_data and llm_json_data.get('lines'):
|
||
# Use lines from LLM JSON response
|
||
for line in llm_json_data['lines']:
|
||
formatted_lines.append({
|
||
"description": line.get('description'),
|
||
"quantity": float(line.get('quantity')) if line.get('quantity') else None,
|
||
"unit_price": float(line.get('unit_price')) if line.get('unit_price') else None,
|
||
"vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None,
|
||
"line_total": float(line.get('line_total')) if line.get('line_total') else None,
|
||
"vat_note": line.get('vat_note')
|
||
})
|
||
|
||
# Build llm_data response
|
||
llm_data = None
|
||
if llm_json_data:
|
||
# Use invoice_number from LLM JSON (works for both AI and template extraction)
|
||
llm_data = {
|
||
"invoice_number": llm_json_data.get('invoice_number'),
|
||
"invoice_date": llm_json_data.get('invoice_date'),
|
||
"due_date": llm_json_data.get('due_date'),
|
||
"total_amount": float(llm_json_data.get('total_amount')) if llm_json_data.get('total_amount') else None,
|
||
"currency": llm_json_data.get('currency') or 'DKK',
|
||
"document_type": llm_json_data.get('document_type'),
|
||
"lines": formatted_lines
|
||
}
|
||
elif extraction:
|
||
# Fallback to extraction table columns if no LLM JSON
|
||
llm_data = {
|
||
"invoice_number": extraction.get('document_id'),
|
||
"invoice_date": extraction.get('document_date').isoformat() if extraction.get('document_date') else None,
|
||
"due_date": extraction.get('due_date').isoformat() if extraction.get('due_date') else None,
|
||
"total_amount": float(extraction.get('total_amount')) if extraction.get('total_amount') else None,
|
||
"currency": extraction.get('currency') or 'DKK',
|
||
"document_type": extraction.get('document_type'),
|
||
"lines": formatted_lines
|
||
}
|
||
|
||
# Get vendor from extraction
|
||
vendor_matched_id = extraction.get('vendor_matched_id') if extraction else None
|
||
|
||
return {
|
||
"file_id": file_id,
|
||
"filename": file_info['filename'],
|
||
"status": file_info['status'],
|
||
"uploaded_at": file_info['uploaded_at'],
|
||
"vendor_matched_id": vendor_matched_id,
|
||
"llm_data": llm_data,
|
||
"extraction": extraction,
|
||
"extraction_lines": extraction_lines if extraction_lines else [],
|
||
"pdf_text_preview": pdf_text[:5000] if pdf_text else None
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get extracted data: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/files/{file_id}/download")
|
||
async def download_pending_file(file_id: int):
|
||
"""View PDF in browser"""
|
||
from fastapi.responses import FileResponse
|
||
from pathlib import Path
|
||
|
||
try:
|
||
# Get file info
|
||
file_info = execute_query(
|
||
"SELECT * FROM incoming_files WHERE file_id = %s",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not file_info:
|
||
raise HTTPException(status_code=404, detail="Fil ikke fundet")
|
||
|
||
file_path = Path(file_info['file_path'])
|
||
if not file_path.exists():
|
||
raise HTTPException(status_code=404, detail="Fil findes ikke på disk")
|
||
|
||
# Return with inline disposition so browser displays it instead of downloading
|
||
return FileResponse(
|
||
path=str(file_path),
|
||
media_type='application/pdf',
|
||
headers={"Content-Disposition": f"inline; filename={file_info['filename']}"}
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to view file: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/files/{file_id}/pdf")
|
||
async def get_file_pdf(file_id: int):
|
||
"""Get PDF file for viewing (alias for download endpoint)"""
|
||
return await download_pending_file(file_id)
|
||
|
||
|
||
@router.post("/supplier-invoices/files/{file_id}/link-vendor")
|
||
async def link_vendor_to_extraction(file_id: int, data: dict):
|
||
"""Link an existing vendor to the extraction"""
|
||
try:
|
||
vendor_id = data.get('vendor_id')
|
||
if not vendor_id:
|
||
raise HTTPException(status_code=400, detail="vendor_id is required")
|
||
|
||
# Verify vendor exists
|
||
vendor = execute_query(
|
||
"SELECT id, name FROM vendors WHERE id = %s",
|
||
(vendor_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not vendor:
|
||
raise HTTPException(status_code=404, detail="Leverandør ikke fundet")
|
||
|
||
# Get latest extraction for this file
|
||
extraction = execute_query(
|
||
"SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not extraction:
|
||
raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil")
|
||
|
||
# Update extraction with vendor match
|
||
execute_update(
|
||
"""UPDATE extractions
|
||
SET vendor_matched_id = %s
|
||
WHERE extraction_id = %s""",
|
||
(vendor_id, extraction['extraction_id'])
|
||
)
|
||
|
||
logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to extraction for file {file_id}")
|
||
|
||
return {
|
||
"status": "success",
|
||
"vendor_id": vendor_id,
|
||
"vendor_name": vendor['name']
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to link vendor: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.delete("/supplier-invoices/files/{file_id}")
|
||
async def delete_pending_file_endpoint(file_id: int):
|
||
"""Slet uploaded fil og relateret data"""
|
||
import os
|
||
from pathlib import Path
|
||
|
||
try:
|
||
# Get file info
|
||
file_info = execute_query(
|
||
"SELECT * FROM incoming_files WHERE file_id = %s",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not file_info:
|
||
raise HTTPException(status_code=404, detail="Fil ikke fundet")
|
||
|
||
# Check if already converted to invoice
|
||
invoice_exists = execute_query(
|
||
"""SELECT si.id FROM supplier_invoices si
|
||
JOIN extractions e ON si.extraction_id = e.extraction_id
|
||
WHERE e.file_id = %s""",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if invoice_exists:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Kan ikke slette fil - der er allerede oprettet en faktura fra denne fil"
|
||
)
|
||
|
||
# Delete from database (cascade will handle extractions)
|
||
execute_update(
|
||
"DELETE FROM incoming_files WHERE file_id = %s",
|
||
(file_id,)
|
||
)
|
||
|
||
# Delete physical file
|
||
if file_info['file_path']:
|
||
file_path = Path(file_info['file_path'])
|
||
if file_path.exists():
|
||
os.remove(file_path)
|
||
logger.info(f"🗑️ Deleted file: {file_path}")
|
||
|
||
return {"message": "Fil slettet", "file_id": file_id}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to delete file: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.patch("/supplier-invoices/files/{file_id}")
|
||
async def update_file_status(file_id: int, data: dict):
|
||
"""Opdater status på uploadet fil"""
|
||
try:
|
||
allowed_statuses = ['pending', 'processing', 'processed', 'ai_extracted', 'completed', 'failed']
|
||
new_status = data.get('status')
|
||
|
||
if not new_status or new_status not in allowed_statuses:
|
||
raise HTTPException(status_code=400, detail=f"Ugyldig status. Tilladte: {', '.join(allowed_statuses)}")
|
||
|
||
execute_update(
|
||
"UPDATE incoming_files SET status = %s, processed_at = CURRENT_TIMESTAMP WHERE file_id = %s",
|
||
(new_status, file_id)
|
||
)
|
||
|
||
logger.info(f"✅ Updated file {file_id} status to {new_status}")
|
||
|
||
return {"status": "success", "file_id": file_id, "new_status": new_status}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to update file status: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/files/{file_id}/link-vendor")
|
||
async def link_vendor_to_extraction(file_id: int, data: dict):
|
||
"""Link en eksisterende leverandør til en extraction"""
|
||
try:
|
||
vendor_id = data.get('vendor_id')
|
||
|
||
if not vendor_id:
|
||
raise HTTPException(status_code=400, detail="vendor_id er påkrævet")
|
||
|
||
# Verify vendor exists
|
||
vendor = execute_query(
|
||
"SELECT id, name FROM vendors WHERE id = %s",
|
||
(vendor_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not vendor:
|
||
raise HTTPException(status_code=404, detail=f"Leverandør {vendor_id} ikke fundet")
|
||
|
||
# Get latest extraction for this file
|
||
extraction = execute_query(
|
||
"SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not extraction:
|
||
raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil")
|
||
|
||
# Update extraction with vendor match
|
||
execute_update(
|
||
"UPDATE extractions SET vendor_matched_id = %s WHERE extraction_id = %s",
|
||
(vendor_id, extraction['extraction_id'])
|
||
)
|
||
|
||
logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to extraction {extraction['extraction_id']}")
|
||
|
||
return {
|
||
"status": "success",
|
||
"vendor_id": vendor_id,
|
||
"vendor_name": vendor['name'],
|
||
"extraction_id": extraction['extraction_id']
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to link vendor: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/from-extraction/{file_id}")
|
||
async def create_invoice_from_extraction(file_id: int):
|
||
"""Opret leverandørfaktura fra extraction data"""
|
||
try:
|
||
# Get latest extraction for this file
|
||
extraction = execute_query(
|
||
"""SELECT e.*, v.name as vendor_name
|
||
FROM extractions e
|
||
LEFT JOIN vendors v ON v.id = e.vendor_matched_id
|
||
WHERE e.file_id = %s
|
||
ORDER BY e.created_at DESC
|
||
LIMIT 1""",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not extraction:
|
||
raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil")
|
||
|
||
# Check if vendor is matched
|
||
if not extraction['vendor_matched_id']:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Leverandør skal linkes før faktura kan oprettes. Brug 'Link eller Opret Leverandør' først."
|
||
)
|
||
|
||
# Check if invoice already exists
|
||
existing = execute_query(
|
||
"SELECT id FROM supplier_invoices WHERE extraction_id = %s",
|
||
(extraction['extraction_id'],),
|
||
fetchone=True
|
||
)
|
||
|
||
if existing:
|
||
raise HTTPException(status_code=400, detail="Faktura er allerede oprettet fra denne extraction")
|
||
|
||
# Get extraction lines
|
||
lines = execute_query(
|
||
"""SELECT * FROM extraction_lines
|
||
WHERE extraction_id = %s
|
||
ORDER BY line_number""",
|
||
(extraction['extraction_id'],)
|
||
)
|
||
|
||
# Parse LLM response JSON if it's a string
|
||
import json
|
||
llm_data = extraction.get('llm_response_json')
|
||
if isinstance(llm_data, str):
|
||
try:
|
||
llm_data = json.loads(llm_data)
|
||
except:
|
||
llm_data = {}
|
||
elif not llm_data:
|
||
llm_data = {}
|
||
|
||
# Get invoice number and type from LLM data or generate one
|
||
invoice_number = llm_data.get('invoice_number') if llm_data else None
|
||
if not invoice_number:
|
||
invoice_number = f"INV-{file_id}"
|
||
|
||
# Detect document type (invoice or credit_note)
|
||
document_type = llm_data.get('document_type', 'invoice') if llm_data else 'invoice'
|
||
invoice_type = 'credit_note' if document_type == 'credit_note' else 'invoice'
|
||
|
||
# Get dates - use today as fallback if missing
|
||
from datetime import datetime, timedelta
|
||
invoice_date = extraction.get('document_date')
|
||
if not invoice_date:
|
||
invoice_date = datetime.now().strftime('%Y-%m-%d')
|
||
logger.warning(f"⚠️ No invoice_date found, using today: {invoice_date}")
|
||
|
||
due_date = extraction.get('due_date')
|
||
if not due_date:
|
||
# Default to 30 days from invoice date
|
||
inv_date_obj = datetime.strptime(invoice_date, '%Y-%m-%d')
|
||
due_date = (inv_date_obj + timedelta(days=30)).strftime('%Y-%m-%d')
|
||
logger.warning(f"⚠️ No due_date found, using invoice_date + 30 days: {due_date}")
|
||
|
||
# Create supplier invoice
|
||
invoice_id = execute_insert(
|
||
"""INSERT INTO supplier_invoices (
|
||
vendor_id, invoice_number, invoice_date, due_date,
|
||
total_amount, currency, status, extraction_id, notes, invoice_type
|
||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
RETURNING id""",
|
||
(
|
||
extraction['vendor_matched_id'],
|
||
invoice_number,
|
||
invoice_date,
|
||
due_date,
|
||
extraction['total_amount'],
|
||
extraction['currency'],
|
||
'credited' if invoice_type == 'credit_note' else 'unpaid',
|
||
extraction['extraction_id'],
|
||
f"Oprettet fra AI extraction (file_id: {file_id})",
|
||
invoice_type
|
||
)
|
||
)
|
||
|
||
# Create invoice lines
|
||
if lines:
|
||
for line in lines:
|
||
execute_update(
|
||
"""INSERT INTO supplier_invoice_lines (
|
||
supplier_invoice_id, description, quantity, unit_price,
|
||
line_total, vat_rate, vat_amount
|
||
) VALUES (%s, %s, %s, %s, %s, %s, %s)""",
|
||
(
|
||
invoice_id,
|
||
line['description'],
|
||
line.get('quantity') or 1,
|
||
line.get('unit_price') or 0,
|
||
line.get('line_total') or 0,
|
||
line.get('vat_rate') or 25.00, # Default 25% Danish VAT if NULL
|
||
line.get('vat_amount')
|
||
)
|
||
)
|
||
|
||
# Update file status
|
||
execute_update(
|
||
"UPDATE incoming_files SET status = 'completed' WHERE file_id = %s",
|
||
(file_id,)
|
||
)
|
||
|
||
logger.info(f"✅ Created supplier invoice {invoice_id} from extraction {extraction['extraction_id']}")
|
||
|
||
return {
|
||
"status": "success",
|
||
"invoice_id": invoice_id,
|
||
"invoice_number": invoice_number,
|
||
"vendor_name": extraction['vendor_name'],
|
||
"total_amount": extraction['total_amount'],
|
||
"currency": extraction['currency']
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to create invoice from extraction: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# Keep existing endpoints below...
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to delete file: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ==========
|
||
|
||
@router.get("/supplier-invoices/templates")
|
||
async def list_templates():
|
||
"""Hent alle templates (både database og invoice2data YAML)"""
|
||
try:
|
||
# Get database templates
|
||
query = """
|
||
SELECT t.*, v.name as vendor_name
|
||
FROM supplier_invoice_templates t
|
||
LEFT JOIN vendors v ON t.vendor_id = v.id
|
||
WHERE t.is_active = true
|
||
ORDER BY t.created_at DESC
|
||
"""
|
||
db_templates = execute_query(query) or []
|
||
|
||
# Get invoice2data templates
|
||
invoice2data_service = get_invoice2data_service()
|
||
invoice2data_templates = []
|
||
|
||
for template_name, template_data in invoice2data_service.templates.items():
|
||
# Extract vendor CVR from keywords
|
||
vendor_cvr = None
|
||
keywords = template_data.get('keywords', [])
|
||
for keyword in keywords:
|
||
if isinstance(keyword, str) and keyword.isdigit() and len(keyword) == 8:
|
||
vendor_cvr = keyword
|
||
break
|
||
|
||
# Get vendor info from database if CVR found
|
||
vendor_name = template_data.get('issuer', 'Ukendt')
|
||
vendor_id = None
|
||
if vendor_cvr:
|
||
vendor = execute_query(
|
||
"SELECT id, name FROM vendors WHERE cvr_number = %s",
|
||
(vendor_cvr,),
|
||
fetchone=True
|
||
)
|
||
if vendor:
|
||
vendor_id = vendor['id']
|
||
vendor_name = vendor['name']
|
||
|
||
invoice2data_templates.append({
|
||
'template_id': -1, # Negative ID to distinguish from DB templates
|
||
'template_name': f"Invoice2Data: {template_name}",
|
||
'template_type': 'invoice2data',
|
||
'yaml_filename': template_name,
|
||
'vendor_id': vendor_id,
|
||
'vendor_name': vendor_name,
|
||
'vendor_cvr': vendor_cvr,
|
||
'default_product_category': template_data.get('default_product_category', 'varesalg'),
|
||
'default_product_group_number': template_data.get('default_product_group_number', 1),
|
||
'usage_count': 0, # Could track this separately
|
||
'is_active': True,
|
||
'detection_patterns': keywords,
|
||
'field_mappings': template_data.get('fields', {}),
|
||
'created_at': None
|
||
})
|
||
|
||
# Combine both types
|
||
all_templates = db_templates + invoice2data_templates
|
||
|
||
return all_templates
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to list templates: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/templates/{template_id}")
|
||
async def get_template(template_id: int):
|
||
"""Hent et specifikt template med vendor info"""
|
||
try:
|
||
query = """
|
||
SELECT t.*, v.name as vendor_name, v.cvr_number as vendor_cvr
|
||
FROM supplier_invoice_templates t
|
||
LEFT JOIN vendors v ON t.vendor_id = v.id
|
||
WHERE t.template_id = %s AND t.is_active = true
|
||
"""
|
||
template = execute_query(query, (template_id,), fetchone=True)
|
||
|
||
if not template:
|
||
raise HTTPException(status_code=404, detail="Template not found")
|
||
|
||
return template
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get template {template_id}: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/search-vendor")
|
||
async def search_vendor_by_info(request: Dict):
|
||
"""
|
||
Søg efter vendor baseret på navn, CVR, eller opret ny
|
||
|
||
Request body:
|
||
{
|
||
"vendor_name": "DCS ApS",
|
||
"vendor_cvr": "12345678",
|
||
"vendor_address": "Vej 1, 2000 By",
|
||
"create_if_missing": true
|
||
}
|
||
"""
|
||
try:
|
||
vendor_name = request.get('vendor_name')
|
||
vendor_cvr = request.get('vendor_cvr')
|
||
vendor_address = request.get('vendor_address')
|
||
create_if_missing = request.get('create_if_missing', False)
|
||
|
||
# Search by CVR first (most accurate)
|
||
if vendor_cvr:
|
||
vendor = execute_query(
|
||
"SELECT id, name, cvr_number FROM vendors WHERE cvr_number = %s",
|
||
(vendor_cvr,),
|
||
fetchone=True
|
||
)
|
||
if vendor:
|
||
return {
|
||
"found": True,
|
||
"vendor_id": vendor['id'],
|
||
"vendor_name": vendor['name'],
|
||
"source": "cvr_match"
|
||
}
|
||
|
||
# Search by name (fuzzy)
|
||
if vendor_name:
|
||
vendors = execute_query(
|
||
"SELECT id, name, cvr_number FROM vendors WHERE LOWER(name) LIKE LOWER(%s) LIMIT 5",
|
||
(f"%{vendor_name}%",)
|
||
)
|
||
if vendors:
|
||
return {
|
||
"found": True,
|
||
"matches": vendors,
|
||
"source": "name_search",
|
||
"message": "Flere mulige matches - vælg en eller opret ny"
|
||
}
|
||
|
||
# Create new vendor if requested
|
||
if create_if_missing and vendor_name:
|
||
from app.core.config import settings
|
||
|
||
# Validate not creating vendor with own CVR
|
||
if vendor_cvr and settings.OWN_CVR in vendor_cvr:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Kan ikke oprette vendor med eget CVR ({settings.OWN_CVR})"
|
||
)
|
||
|
||
new_vendor_id = execute_insert(
|
||
"""INSERT INTO vendors (name, cvr_number, address, created_at)
|
||
VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""",
|
||
(vendor_name, vendor_cvr, vendor_address)
|
||
)
|
||
|
||
logger.info(f"✅ Created new vendor: {vendor_name} (ID: {new_vendor_id})")
|
||
|
||
return {
|
||
"found": False,
|
||
"created": True,
|
||
"vendor_id": new_vendor_id,
|
||
"vendor_name": vendor_name,
|
||
"source": "newly_created"
|
||
}
|
||
|
||
return {
|
||
"found": False,
|
||
"message": "Ingen vendor fundet - angiv create_if_missing=true for at oprette"
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Vendor search failed: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/ai/analyze")
|
||
async def ai_analyze_invoice(request: Dict):
|
||
"""Brug AI til at analysere faktura og foreslå template felter"""
|
||
try:
|
||
pdf_text = request.get('pdf_text', '')
|
||
vendor_id = request.get('vendor_id')
|
||
|
||
if not pdf_text:
|
||
raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet")
|
||
|
||
# Build enhanced PDF text with instruction
|
||
from app.core.config import settings
|
||
|
||
enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering.
|
||
|
||
RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON!
|
||
|
||
REQUIRED STRUKTUR (alle felter skal med):
|
||
{{
|
||
"invoice_number": "5082481",
|
||
"invoice_date": "24/10-25",
|
||
"total_amount": "1471.20",
|
||
"cvr": "29522790",
|
||
"detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"],
|
||
"lines_start": "Nr.VarenrTekst",
|
||
"lines_end": "Subtotal"
|
||
}}
|
||
|
||
FIND FØLGENDE:
|
||
1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice")
|
||
2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY)
|
||
3. total_amount: Total beløb
|
||
- Søg efter "Total", "I alt", "Totalbeløb"
|
||
- Hvis beløbet er på næste linje, match sidste tal
|
||
- Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20)
|
||
4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK")
|
||
- IGNORER CVR {settings.OWN_CVR} - dette er KØBERS CVR, ikke leverandør!
|
||
- Find LEVERANDØRENS CVR (normalt i toppen/header)
|
||
5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren
|
||
- Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S")
|
||
- Website eller email (f.eks. "WWW.DCS.DK")
|
||
- Adresse element (f.eks. "Høgemosevænget", "Mårkærvej")
|
||
- UNDGÅ generiske ord som "Faktura", "Danmark", "Side"
|
||
6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr")
|
||
7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af")
|
||
|
||
VIGTIGT:
|
||
- detection_patterns SKAL være mindst 3 specifikke tekststrenge
|
||
- Vælg tekststrenge der er UNIKKE for denne leverandør
|
||
- CVR SKAL være leverandørens - IKKE {settings.OWN_CVR} (det er køber)
|
||
- LAD VÆRE med at lave patterns eller line_item regex - kun udtræk rå data
|
||
|
||
PDF TEKST:
|
||
{pdf_text[:2000]}
|
||
|
||
RETURNER KUN JSON - intet andet!"""
|
||
|
||
# Call Ollama
|
||
logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst")
|
||
result = await ollama_service.extract_from_text(enhanced_text)
|
||
|
||
if not result:
|
||
raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen")
|
||
|
||
logger.info(f"✅ AI analyse gennemført: {result}")
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ AI analyse fejlede: {e}")
|
||
raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}")
|
||
|
||
|
||
@router.post("/supplier-invoices/templates")
|
||
async def create_template(request: Dict):
|
||
"""
|
||
Opret ny template
|
||
|
||
Request body:
|
||
{
|
||
"vendor_id": 1,
|
||
"template_name": "Test Template",
|
||
"detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}],
|
||
"field_mappings": {"invoice_number": {"pattern": r"Nummer\s*(\d+)", "group": 1}}
|
||
}
|
||
"""
|
||
try:
|
||
import json
|
||
from app.core.config import settings
|
||
|
||
vendor_id = request.get('vendor_id')
|
||
template_name = request.get('template_name')
|
||
detection_patterns = request.get('detection_patterns', [])
|
||
field_mappings = request.get('field_mappings', {})
|
||
default_product_category = request.get('default_product_category', 'varesalg')
|
||
|
||
if not vendor_id or not template_name:
|
||
raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet")
|
||
|
||
# Validate that vendor CVR is not own company
|
||
vendor_cvr_mapping = field_mappings.get('vendor_cvr', {})
|
||
if vendor_cvr_mapping:
|
||
# Extract CVR value from pattern or value field
|
||
cvr_value = vendor_cvr_mapping.get('value') or vendor_cvr_mapping.get('pattern', '')
|
||
if settings.OWN_CVR in str(cvr_value):
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"CVR {cvr_value} matcher egen virksomhed ({settings.OWN_CVR}). Brug leverandørens CVR, ikke købers!"
|
||
)
|
||
|
||
# Insert template and get template_id
|
||
query = """
|
||
INSERT INTO supplier_invoice_templates
|
||
(vendor_id, template_name, detection_patterns, field_mappings, default_product_category)
|
||
VALUES (%s, %s, %s, %s, %s)
|
||
RETURNING template_id
|
||
"""
|
||
result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings), default_product_category))
|
||
template_id = result[0]['template_id'] if result else None
|
||
|
||
if not template_id:
|
||
raise HTTPException(status_code=500, detail="Kunne ikke oprette template")
|
||
|
||
# Reload templates in cache
|
||
template_service.reload_templates()
|
||
|
||
logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}")
|
||
return {"template_id": template_id, "message": "Template oprettet"}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to create template: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/{invoice_id}")
|
||
async def get_supplier_invoice(invoice_id: int):
|
||
"""Get single supplier invoice with lines"""
|
||
try:
|
||
invoice = execute_query(
|
||
"""SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id
|
||
FROM supplier_invoices si
|
||
LEFT JOIN vendors v ON si.vendor_id = v.id
|
||
WHERE si.id = %s""",
|
||
(invoice_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not invoice:
|
||
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
|
||
|
||
# Get lines
|
||
lines = execute_query(
|
||
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
|
||
(invoice_id,)
|
||
)
|
||
invoice['lines'] = lines
|
||
|
||
return invoice
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get supplier invoice {invoice_id}: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices")
|
||
async def create_supplier_invoice(data: Dict):
|
||
"""
|
||
Create new supplier invoice
|
||
|
||
Required fields:
|
||
- invoice_number: str
|
||
- vendor_id: int
|
||
- invoice_date: str (YYYY-MM-DD)
|
||
- total_amount: float
|
||
|
||
Optional fields:
|
||
- due_date: str (YYYY-MM-DD) - defaults to invoice_date + 30 days
|
||
- vat_amount: float
|
||
- net_amount: float
|
||
- currency: str (default 'DKK')
|
||
- description: str
|
||
- notes: str
|
||
- lines: List[Dict] with line items
|
||
"""
|
||
try:
|
||
# Validate required fields
|
||
required = ['invoice_number', 'vendor_id', 'invoice_date', 'total_amount']
|
||
missing = [f for f in required if f not in data]
|
||
if missing:
|
||
raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}")
|
||
|
||
# Calculate due_date if not provided (30 days default)
|
||
invoice_date = datetime.fromisoformat(data['invoice_date'])
|
||
due_date = data.get('due_date')
|
||
if not due_date:
|
||
due_date = (invoice_date + timedelta(days=30)).strftime('%Y-%m-%d')
|
||
|
||
# Determine invoice type (default to invoice)
|
||
invoice_type = data.get('invoice_type', 'invoice')
|
||
if invoice_type not in ['invoice', 'credit_note']:
|
||
invoice_type = 'invoice'
|
||
|
||
# Insert supplier invoice
|
||
invoice_id = execute_insert(
|
||
"""INSERT INTO supplier_invoices
|
||
(invoice_number, vendor_id, vendor_name, invoice_date, due_date,
|
||
total_amount, vat_amount, net_amount, currency, description, notes,
|
||
status, created_by, invoice_type)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
|
||
(
|
||
data['invoice_number'],
|
||
data['vendor_id'],
|
||
data.get('vendor_name'),
|
||
data['invoice_date'],
|
||
due_date,
|
||
data['total_amount'],
|
||
data.get('vat_amount', 0),
|
||
data.get('net_amount', data['total_amount']),
|
||
data.get('currency', 'DKK'),
|
||
data.get('description'),
|
||
data.get('notes'),
|
||
'credited' if invoice_type == 'credit_note' else 'pending',
|
||
data.get('created_by'),
|
||
invoice_type
|
||
)
|
||
)
|
||
|
||
# Insert lines if provided
|
||
if data.get('lines'):
|
||
for idx, line in enumerate(data['lines'], start=1):
|
||
# Map vat_code: I52 for reverse charge, I25 for standard
|
||
vat_code = line.get('vat_code', 'I25')
|
||
|
||
execute_insert(
|
||
"""INSERT INTO supplier_invoice_lines
|
||
(supplier_invoice_id, line_number, description, quantity, unit_price,
|
||
line_total, vat_code, vat_rate, vat_amount, contra_account, sku)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
|
||
(
|
||
invoice_id,
|
||
line.get('line_number', idx),
|
||
line.get('description'),
|
||
line.get('quantity', 1),
|
||
line.get('unit_price', 0),
|
||
line.get('line_total', 0),
|
||
vat_code,
|
||
line.get('vat_rate', 25.00),
|
||
line.get('vat_amount', 0),
|
||
line.get('contra_account', '5810'),
|
||
line.get('sku')
|
||
)
|
||
)
|
||
|
||
logger.info(f"✅ Created supplier invoice: {data['invoice_number']} (ID: {invoice_id})")
|
||
|
||
return {
|
||
"success": True,
|
||
"invoice_id": invoice_id,
|
||
"invoice_number": data['invoice_number'],
|
||
"due_date": due_date
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to create supplier invoice: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.put("/supplier-invoices/{invoice_id}")
|
||
async def update_supplier_invoice(invoice_id: int, data: Dict):
|
||
"""Update supplier invoice details"""
|
||
try:
|
||
# Check if invoice exists
|
||
existing = execute_query(
|
||
"SELECT id, status FROM supplier_invoices WHERE id = %s",
|
||
(invoice_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not existing:
|
||
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
|
||
|
||
# Don't allow editing if already sent to e-conomic
|
||
if existing['status'] == 'sent_to_economic':
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Cannot edit invoice that has been sent to e-conomic"
|
||
)
|
||
|
||
# Build update query dynamically based on provided fields
|
||
update_fields = []
|
||
params = []
|
||
|
||
allowed_fields = ['invoice_number', 'vendor_id', 'vendor_name', 'invoice_date',
|
||
'due_date', 'total_amount', 'vat_amount', 'net_amount',
|
||
'currency', 'description', 'notes', 'status']
|
||
|
||
for field in allowed_fields:
|
||
if field in data:
|
||
update_fields.append(f"{field} = %s")
|
||
params.append(data[field])
|
||
|
||
if not update_fields:
|
||
raise HTTPException(status_code=400, detail="No fields to update")
|
||
|
||
params.append(invoice_id)
|
||
|
||
query = f"""
|
||
UPDATE supplier_invoices
|
||
SET {', '.join(update_fields)}, updated_at = CURRENT_TIMESTAMP
|
||
WHERE id = %s
|
||
"""
|
||
|
||
execute_update(query, tuple(params))
|
||
|
||
logger.info(f"✅ Updated supplier invoice {invoice_id}")
|
||
|
||
return {"success": True, "invoice_id": invoice_id}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to update supplier invoice {invoice_id}: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.delete("/supplier-invoices/{invoice_id}")
|
||
async def delete_supplier_invoice(invoice_id: int):
|
||
"""Delete supplier invoice (soft delete if integrated with e-conomic)"""
|
||
try:
|
||
invoice = execute_query(
|
||
"SELECT id, invoice_number, economic_voucher_number FROM supplier_invoices WHERE id = %s",
|
||
(invoice_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not invoice:
|
||
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
|
||
|
||
# If sent to e-conomic, only mark as cancelled (don't delete)
|
||
if invoice.get('economic_voucher_number'):
|
||
execute_update(
|
||
"UPDATE supplier_invoices SET status = 'cancelled', updated_at = CURRENT_TIMESTAMP WHERE id = %s",
|
||
(invoice_id,)
|
||
)
|
||
logger.info(f"⚠️ Marked supplier invoice {invoice['invoice_number']} as cancelled (sent to e-conomic)")
|
||
return {"success": True, "message": "Invoice marked as cancelled", "invoice_id": invoice_id}
|
||
|
||
# Otherwise, delete invoice and lines
|
||
execute_update("DELETE FROM supplier_invoice_lines WHERE supplier_invoice_id = %s", (invoice_id,))
|
||
execute_update("DELETE FROM supplier_invoices WHERE id = %s", (invoice_id,))
|
||
|
||
logger.info(f"🗑️ Deleted supplier invoice {invoice['invoice_number']} (ID: {invoice_id})")
|
||
|
||
return {"success": True, "message": "Invoice deleted", "invoice_id": invoice_id}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to delete supplier invoice {invoice_id}: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ========== E-CONOMIC INTEGRATION ==========
|
||
|
||
@router.post("/supplier-invoices/{invoice_id}/approve")
|
||
async def approve_supplier_invoice(invoice_id: int, approved_by: str):
|
||
"""Approve supplier invoice for payment"""
|
||
try:
|
||
invoice = execute_query(
|
||
"SELECT id, invoice_number, status FROM supplier_invoices WHERE id = %s",
|
||
(invoice_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not invoice:
|
||
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
|
||
|
||
if invoice['status'] != 'pending':
|
||
raise HTTPException(status_code=400, detail=f"Invoice is already {invoice['status']}")
|
||
|
||
execute_update(
|
||
"""UPDATE supplier_invoices
|
||
SET status = 'approved', approved_by = %s, approved_at = CURRENT_TIMESTAMP
|
||
WHERE id = %s""",
|
||
(approved_by, invoice_id)
|
||
)
|
||
|
||
logger.info(f"✅ Approved supplier invoice {invoice['invoice_number']} by {approved_by}")
|
||
|
||
return {"success": True, "invoice_id": invoice_id, "approved_by": approved_by}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to approve invoice {invoice_id}: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/{invoice_id}/send-to-economic")
|
||
async def send_to_economic(invoice_id: int):
|
||
"""
|
||
Send approved supplier invoice to e-conomic kassekladde
|
||
Creates voucher entry in e-conomic journals
|
||
"""
|
||
try:
|
||
# Get invoice with lines
|
||
invoice = execute_query(
|
||
"""SELECT si.*, v.economic_supplier_number as vendor_economic_id, v.name as vendor_full_name
|
||
FROM supplier_invoices si
|
||
LEFT JOIN vendors v ON si.vendor_id = v.id
|
||
WHERE si.id = %s""",
|
||
(invoice_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not invoice:
|
||
raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found")
|
||
|
||
if invoice['status'] != 'approved':
|
||
raise HTTPException(status_code=400, detail="Invoice must be approved before sending to e-conomic")
|
||
|
||
if invoice.get('economic_voucher_number'):
|
||
raise HTTPException(status_code=400, detail="Invoice already sent to e-conomic")
|
||
|
||
# Get lines
|
||
lines = execute_query(
|
||
"SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number",
|
||
(invoice_id,)
|
||
)
|
||
|
||
if not lines:
|
||
raise HTTPException(status_code=400, detail="Invoice must have at least one line item")
|
||
|
||
# Check if vendor exists in e-conomic
|
||
economic = get_economic_service()
|
||
|
||
vendor_economic_id = invoice.get('vendor_economic_id')
|
||
|
||
# If vendor not in e-conomic, create it
|
||
if not vendor_economic_id:
|
||
vendor_result = await economic.search_supplier_by_name(invoice.get('vendor_full_name') or invoice.get('vendor_name'))
|
||
|
||
if vendor_result:
|
||
vendor_economic_id = vendor_result['supplierNumber']
|
||
# Update local vendor record
|
||
execute_update(
|
||
"UPDATE vendors SET economic_supplier_number = %s WHERE id = %s",
|
||
(vendor_economic_id, invoice['vendor_id'])
|
||
)
|
||
else:
|
||
# Create new supplier in e-conomic
|
||
new_supplier = await economic.create_supplier({
|
||
'name': invoice.get('vendor_full_name') or invoice.get('vendor_name'),
|
||
'currency': invoice.get('currency', 'DKK')
|
||
})
|
||
|
||
if new_supplier and new_supplier.get('supplierNumber'):
|
||
vendor_economic_id = new_supplier['supplierNumber']
|
||
else:
|
||
raise HTTPException(status_code=500, detail="Failed to create supplier in e-conomic")
|
||
|
||
# Get default journal number from settings
|
||
journal_setting = execute_query(
|
||
"SELECT setting_value FROM supplier_invoice_settings WHERE setting_key = 'economic_default_journal'",
|
||
fetchone=True
|
||
)
|
||
journal_number = int(journal_setting['setting_value']) if journal_setting else 1
|
||
|
||
# Build VAT breakdown from lines
|
||
vat_breakdown = {}
|
||
line_items = []
|
||
|
||
for line in lines:
|
||
vat_code = line.get('vat_code', 'I25')
|
||
|
||
if vat_code not in vat_breakdown:
|
||
vat_breakdown[vat_code] = {
|
||
'net': 0,
|
||
'vat': 0,
|
||
'gross': 0,
|
||
'rate': line.get('vat_rate', 25.00)
|
||
}
|
||
|
||
line_total = float(line.get('line_total', 0))
|
||
vat_amount = float(line.get('vat_amount', 0))
|
||
net_amount = line_total - vat_amount
|
||
|
||
vat_breakdown[vat_code]['net'] += net_amount
|
||
vat_breakdown[vat_code]['vat'] += vat_amount
|
||
vat_breakdown[vat_code]['gross'] += line_total
|
||
|
||
line_items.append({
|
||
'description': line.get('description'),
|
||
'quantity': float(line.get('quantity', 1)),
|
||
'unit_price': float(line.get('unit_price', 0)),
|
||
'line_total': line_total,
|
||
'vat_code': vat_code,
|
||
'vat_amount': vat_amount,
|
||
'contra_account': line.get('contra_account', '5810'),
|
||
'sku': line.get('sku')
|
||
})
|
||
|
||
# Send to e-conomic
|
||
result = await economic.create_journal_supplier_invoice(
|
||
journal_number=journal_number,
|
||
supplier_number=vendor_economic_id,
|
||
invoice_number=invoice['invoice_number'],
|
||
invoice_date=invoice['invoice_date'].isoformat() if isinstance(invoice['invoice_date'], date) else invoice['invoice_date'],
|
||
total_amount=float(invoice['total_amount']),
|
||
vat_breakdown=vat_breakdown,
|
||
line_items=line_items,
|
||
due_date=invoice['due_date'].isoformat() if invoice.get('due_date') and isinstance(invoice['due_date'], date) else invoice.get('due_date'),
|
||
text=invoice.get('description') or f"Supplier invoice {invoice['invoice_number']}"
|
||
)
|
||
|
||
if result.get('error'):
|
||
raise HTTPException(status_code=500, detail=result.get('message', 'Failed to create voucher in e-conomic'))
|
||
|
||
# Update invoice with e-conomic details
|
||
execute_update(
|
||
"""UPDATE supplier_invoices
|
||
SET status = 'sent_to_economic',
|
||
economic_supplier_number = %s,
|
||
economic_journal_number = %s,
|
||
economic_voucher_number = %s,
|
||
economic_accounting_year = %s,
|
||
sent_to_economic_at = CURRENT_TIMESTAMP
|
||
WHERE id = %s""",
|
||
(
|
||
vendor_economic_id,
|
||
result['journal_number'],
|
||
result['voucher_number'],
|
||
result['accounting_year'],
|
||
invoice_id
|
||
)
|
||
)
|
||
|
||
# Upload attachment if file_path exists
|
||
if invoice.get('file_path') and os.path.exists(invoice['file_path']):
|
||
attachment_result = await economic.upload_voucher_attachment(
|
||
journal_number=result['journal_number'],
|
||
accounting_year=result['accounting_year'],
|
||
voucher_number=result['voucher_number'],
|
||
pdf_path=invoice['file_path'],
|
||
filename=f"{invoice['invoice_number']}.pdf"
|
||
)
|
||
|
||
if attachment_result.get('success'):
|
||
logger.info(f"📎 Uploaded attachment for voucher {result['voucher_number']}")
|
||
|
||
logger.info(f"✅ Sent supplier invoice {invoice['invoice_number']} to e-conomic (voucher #{result['voucher_number']})")
|
||
|
||
return {
|
||
"success": True,
|
||
"invoice_id": invoice_id,
|
||
"voucher_number": result['voucher_number'],
|
||
"journal_number": result['journal_number'],
|
||
"accounting_year": result['accounting_year']
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to send invoice {invoice_id} to e-conomic: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/economic/journals")
|
||
async def get_economic_journals():
|
||
"""Get available e-conomic journals (kassekladder)"""
|
||
try:
|
||
economic = get_economic_service()
|
||
journals = await economic.get_supplier_invoice_journals()
|
||
return {"journals": journals}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get e-conomic journals: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ========== STATISTICS & REPORTS ==========
|
||
|
||
@router.get("/supplier-invoices/stats/overview")
|
||
async def get_payment_overview():
|
||
"""
|
||
Get overview of supplier invoices payment status
|
||
|
||
Returns stats for total, paid, overdue, due soon, and pending invoices
|
||
"""
|
||
try:
|
||
today = date.today().isoformat()
|
||
|
||
stats = execute_query("""
|
||
SELECT
|
||
COUNT(*) as total_count,
|
||
SUM(CASE WHEN paid_date IS NOT NULL THEN 1 ELSE 0 END) as paid_count,
|
||
SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN 1 ELSE 0 END) as overdue_count,
|
||
SUM(CASE WHEN paid_date IS NULL AND due_date >= %s AND due_date <= (%s::date + INTERVAL '7 days') THEN 1 ELSE 0 END) as due_soon_count,
|
||
SUM(CASE WHEN paid_date IS NULL AND (due_date IS NULL OR due_date > (%s::date + INTERVAL '7 days')) THEN 1 ELSE 0 END) as pending_count,
|
||
SUM(total_amount) as total_amount,
|
||
SUM(CASE WHEN paid_date IS NOT NULL THEN total_amount ELSE 0 END) as paid_amount,
|
||
SUM(CASE WHEN paid_date IS NULL THEN total_amount ELSE 0 END) as unpaid_amount,
|
||
SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN total_amount ELSE 0 END) as overdue_amount
|
||
FROM supplier_invoices
|
||
WHERE status != 'cancelled'
|
||
""", (today, today, today, today, today), fetchone=True)
|
||
|
||
return {
|
||
"total_invoices": stats.get('total_count', 0) if stats else 0,
|
||
"paid_count": stats.get('paid_count', 0) if stats else 0,
|
||
"overdue_count": stats.get('overdue_count', 0) if stats else 0,
|
||
"due_soon_count": stats.get('due_soon_count', 0) if stats else 0,
|
||
"pending_count": stats.get('pending_count', 0) if stats else 0,
|
||
"total_amount": float(stats.get('total_amount', 0) or 0) if stats else 0,
|
||
"paid_amount": float(stats.get('paid_amount', 0) or 0) if stats else 0,
|
||
"unpaid_amount": float(stats.get('unpaid_amount', 0) or 0) if stats else 0,
|
||
"overdue_amount": float(stats.get('overdue_amount', 0) or 0) if stats else 0
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get payment overview: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/stats/by-vendor")
|
||
async def get_stats_by_vendor():
|
||
"""Get supplier invoice statistics grouped by vendor"""
|
||
try:
|
||
stats = execute_query("""
|
||
SELECT
|
||
v.id as vendor_id,
|
||
v.name as vendor_name,
|
||
COUNT(si.id) as invoice_count,
|
||
SUM(si.total_amount) as total_amount,
|
||
SUM(CASE WHEN si.paid_date IS NULL THEN si.total_amount ELSE 0 END) as unpaid_amount,
|
||
MAX(si.due_date) as latest_due_date
|
||
FROM vendors v
|
||
LEFT JOIN supplier_invoices si ON v.id = si.vendor_id
|
||
WHERE si.status != 'cancelled' OR si.status IS NULL
|
||
GROUP BY v.id, v.name
|
||
HAVING COUNT(si.id) > 0
|
||
ORDER BY unpaid_amount DESC
|
||
""")
|
||
|
||
return {"vendor_stats": stats}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to get vendor stats: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
# ========== UPLOAD & AI EXTRACTION ==========
|
||
|
||
@router.post("/supplier-invoices/upload")
|
||
async def upload_supplier_invoice(file: UploadFile = File(...)):
|
||
"""
|
||
Upload supplier invoice (PDF/image) and extract data using templates
|
||
|
||
Process:
|
||
1. Validate file type and size
|
||
2. Calculate SHA256 checksum for duplicate detection
|
||
3. Save file to uploads directory
|
||
4. Extract text (PDF/OCR)
|
||
5. Match template based on PDF content
|
||
6. Extract fields using template regex patterns
|
||
7. Show form with pre-filled data for user review
|
||
|
||
Returns:
|
||
{
|
||
"status": "success|duplicate|needs_review",
|
||
"file_id": int,
|
||
"template_matched": bool,
|
||
"template_id": int,
|
||
"extracted_fields": dict,
|
||
"confidence": float,
|
||
"pdf_text": str # For manual review
|
||
}
|
||
"""
|
||
from app.core.config import settings
|
||
|
||
try:
|
||
# Validate file extension
|
||
suffix = Path(file.filename).suffix.lower()
|
||
if suffix not in settings.ALLOWED_EXTENSIONS:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Filtype {suffix} ikke tilladt. Tilladte: {', '.join(settings.ALLOWED_EXTENSIONS)}"
|
||
)
|
||
|
||
# Create upload directory
|
||
upload_dir = Path(settings.UPLOAD_DIR)
|
||
upload_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Save file temporarily to calculate checksum
|
||
temp_path = upload_dir / f"temp_{datetime.now().timestamp()}_{file.filename}"
|
||
|
||
try:
|
||
# Validate file size while saving
|
||
max_size = settings.MAX_FILE_SIZE_MB * 1024 * 1024
|
||
total_size = 0
|
||
|
||
with open(temp_path, "wb") as buffer:
|
||
while chunk := await file.read(8192):
|
||
total_size += len(chunk)
|
||
if total_size > max_size:
|
||
temp_path.unlink(missing_ok=True)
|
||
raise HTTPException(
|
||
status_code=413,
|
||
detail=f"Fil for stor (max {settings.MAX_FILE_SIZE_MB}MB)"
|
||
)
|
||
buffer.write(chunk)
|
||
|
||
logger.info(f"📥 Uploaded file: {file.filename} ({total_size} bytes)")
|
||
|
||
# Calculate SHA256 checksum
|
||
checksum = ollama_service.calculate_file_checksum(temp_path)
|
||
|
||
# Check for duplicate file
|
||
existing_file = execute_query(
|
||
"SELECT file_id, status FROM incoming_files WHERE checksum = %s",
|
||
(checksum,),
|
||
fetchone=True
|
||
)
|
||
|
||
if existing_file:
|
||
temp_path.unlink(missing_ok=True)
|
||
logger.warning(f"⚠️ Duplicate file detected: {checksum[:16]}...")
|
||
|
||
# Get existing invoice if linked
|
||
existing_invoice = execute_query(
|
||
"""SELECT si.* FROM supplier_invoices si
|
||
JOIN extractions e ON si.extraction_id = e.extraction_id
|
||
WHERE e.file_id = %s""",
|
||
(existing_file['file_id'],),
|
||
fetchone=True
|
||
)
|
||
|
||
return {
|
||
"status": "duplicate",
|
||
"message": "Denne fil er allerede uploadet",
|
||
"file_id": existing_file['file_id'],
|
||
"invoice_id": existing_invoice['id'] if existing_invoice else None
|
||
}
|
||
|
||
# Rename to permanent name
|
||
final_path = upload_dir / file.filename
|
||
counter = 1
|
||
while final_path.exists():
|
||
final_path = upload_dir / f"{final_path.stem}_{counter}{final_path.suffix}"
|
||
counter += 1
|
||
|
||
temp_path.rename(final_path)
|
||
logger.info(f"💾 Saved file as: {final_path.name}")
|
||
|
||
# Insert file record
|
||
file_record = execute_query(
|
||
"""INSERT INTO incoming_files
|
||
(filename, original_filename, file_path, file_size, mime_type, checksum, status)
|
||
VALUES (%s, %s, %s, %s, %s, %s, 'processing') RETURNING file_id""",
|
||
(final_path.name, file.filename, str(final_path), total_size,
|
||
ollama_service._get_mime_type(final_path), checksum),
|
||
fetchone=True
|
||
)
|
||
file_id = file_record['file_id']
|
||
|
||
# Extract text from file
|
||
logger.info(f"📄 Extracting text from {final_path.suffix}...")
|
||
text = await ollama_service._extract_text_from_file(final_path)
|
||
|
||
# QUICK ANALYSIS: Extract CVR, document type, invoice number IMMEDIATELY
|
||
logger.info(f"⚡ Running quick analysis...")
|
||
quick_result = await ollama_service.quick_analysis_on_upload(text)
|
||
|
||
# Update file record with quick analysis results
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET detected_cvr = %s,
|
||
detected_vendor_id = %s,
|
||
detected_document_type = %s,
|
||
detected_document_number = %s,
|
||
is_own_invoice = %s
|
||
WHERE file_id = %s""",
|
||
(quick_result.get('cvr'),
|
||
quick_result.get('vendor_id'),
|
||
quick_result.get('document_type'),
|
||
quick_result.get('document_number'),
|
||
quick_result.get('is_own_invoice', False),
|
||
file_id)
|
||
)
|
||
|
||
logger.info(f"📋 Quick analysis saved: CVR={quick_result.get('cvr')}, "
|
||
f"Vendor={quick_result.get('vendor_name')}, "
|
||
f"Type={quick_result.get('document_type')}, "
|
||
f"Number={quick_result.get('document_number')}")
|
||
|
||
# DUPLICATE CHECK: Check if invoice number already exists
|
||
document_number = quick_result.get('document_number')
|
||
if document_number:
|
||
logger.info(f"🔍 Checking for duplicate invoice number: {document_number}")
|
||
|
||
# Check 1: Search in local database (supplier_invoices table)
|
||
existing_invoice = execute_query(
|
||
"""SELECT si.id, si.invoice_number, si.created_at, v.name as vendor_name
|
||
FROM supplier_invoices si
|
||
LEFT JOIN vendors v ON v.id = si.vendor_id
|
||
WHERE si.invoice_number = %s
|
||
ORDER BY si.created_at DESC
|
||
LIMIT 1""",
|
||
(document_number,),
|
||
fetchone=True
|
||
)
|
||
|
||
if existing_invoice:
|
||
# DUPLICATE FOUND IN DATABASE
|
||
logger.error(f"🚫 DUPLICATE: Invoice {document_number} already exists in database (ID: {existing_invoice['id']})")
|
||
|
||
# Mark file as duplicate
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'duplicate',
|
||
error_message = %s,
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(f"DUBLET: Fakturanummer {document_number} findes allerede i systemet (Faktura #{existing_invoice['id']}, {existing_invoice['vendor_name'] or 'Ukendt leverandør'})",
|
||
file_id)
|
||
)
|
||
|
||
raise HTTPException(
|
||
status_code=409, # 409 Conflict
|
||
detail=f"🚫 DUBLET: Fakturanummer {document_number} findes allerede i systemet (Faktura #{existing_invoice['id']}, oprettet {existing_invoice['created_at'].strftime('%d-%m-%Y')})"
|
||
)
|
||
|
||
# Check 2: Search in e-conomic (if configured)
|
||
from app.services.economic_service import economic_service
|
||
if hasattr(economic_service, 'app_secret_token') and economic_service.app_secret_token:
|
||
logger.info(f"🔍 Checking e-conomic for invoice number: {document_number}")
|
||
economic_duplicate = await economic_service.check_invoice_number_exists(document_number)
|
||
|
||
if economic_duplicate:
|
||
# DUPLICATE FOUND IN E-CONOMIC
|
||
logger.error(f"🚫 DUPLICATE: Invoice {document_number} found in e-conomic (Voucher #{economic_duplicate.get('voucher_number')})")
|
||
|
||
# Mark file as duplicate
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'duplicate',
|
||
error_message = %s,
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(f"DUBLET: Fakturanummer {document_number} findes i e-conomic (Bilag #{economic_duplicate.get('voucher_number')})",
|
||
file_id)
|
||
)
|
||
|
||
raise HTTPException(
|
||
status_code=409, # 409 Conflict
|
||
detail=f"🚫 DUBLET: Fakturanummer {document_number} findes i e-conomic (Bilag #{economic_duplicate.get('voucher_number')}, {economic_duplicate.get('date')})"
|
||
)
|
||
|
||
logger.info(f"✅ No duplicate found for invoice {document_number}")
|
||
|
||
# Try template matching
|
||
logger.info(f"📋 Matching template...")
|
||
template_id, confidence = template_service.match_template(text)
|
||
|
||
extracted_fields = {}
|
||
vendor_id = None
|
||
|
||
if template_id and confidence >= 0.5:
|
||
# Extract fields using template
|
||
logger.info(f"✅ Using template {template_id} ({confidence:.0%} confidence)")
|
||
extracted_fields = template_service.extract_fields(text, template_id)
|
||
|
||
# Get vendor from template
|
||
template = template_service.templates_cache.get(template_id)
|
||
if template:
|
||
vendor_id = template.get('vendor_id')
|
||
|
||
# Save extraction to database
|
||
import json
|
||
extraction_id = execute_insert(
|
||
"""INSERT INTO extractions
|
||
(file_id, vendor_matched_id, document_id, document_date, due_date,
|
||
total_amount, currency, document_type, confidence, llm_response_json, status)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'extracted')""",
|
||
(file_id, vendor_id,
|
||
extracted_fields.get('invoice_number'),
|
||
extracted_fields.get('invoice_date'),
|
||
extracted_fields.get('due_date'),
|
||
extracted_fields.get('total_amount'),
|
||
extracted_fields.get('currency', 'DKK'),
|
||
extracted_fields.get('document_type'),
|
||
confidence,
|
||
json.dumps(extracted_fields))
|
||
)
|
||
|
||
# Insert line items if extracted
|
||
if extracted_fields.get('lines'):
|
||
for idx, line in enumerate(extracted_fields['lines'], start=1):
|
||
execute_insert(
|
||
"""INSERT INTO extraction_lines
|
||
(extraction_id, line_number, description, quantity, unit_price,
|
||
line_total, vat_rate, vat_note, confidence)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
RETURNING line_id""",
|
||
(extraction_id, idx, line.get('description'),
|
||
line.get('quantity'), line.get('unit_price'),
|
||
line.get('line_total'), line.get('vat_rate'),
|
||
line.get('vat_note'), confidence)
|
||
)
|
||
|
||
# Log usage
|
||
template_service.log_usage(template_id, file_id, True, confidence, extracted_fields)
|
||
|
||
# Update file record
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(template_id, file_id)
|
||
)
|
||
else:
|
||
# NO AI FALLBACK - Require template
|
||
logger.warning(f"⚠️ No template matched (confidence: {confidence:.0%}) - rejecting file")
|
||
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'failed',
|
||
error_message = 'Ingen template match - opret template for denne leverandør',
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(file_id,)
|
||
)
|
||
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Ingen template match ({confidence:.0%} confidence) - opret template for denne leverandør"
|
||
)
|
||
|
||
# Return data for user to review and confirm
|
||
return {
|
||
"status": "needs_review",
|
||
"file_id": file_id,
|
||
"template_matched": template_id is not None,
|
||
"template_id": template_id,
|
||
"vendor_id": vendor_id,
|
||
"confidence": confidence,
|
||
"extracted_fields": extracted_fields,
|
||
"pdf_text": text[:500], # First 500 chars for reference
|
||
# Quick analysis results (available IMMEDIATELY on upload)
|
||
"quick_analysis": {
|
||
"cvr": quick_result.get('cvr'),
|
||
"vendor_id": quick_result.get('vendor_id'),
|
||
"vendor_name": quick_result.get('vendor_name'),
|
||
"document_type": quick_result.get('document_type'),
|
||
"document_number": quick_result.get('document_number')
|
||
},
|
||
"message": "Upload gennemført - gennemgå og bekræft data"
|
||
}
|
||
|
||
except HTTPException as he:
|
||
# Mark file as failed if we have file_id
|
||
if 'file_id' in locals():
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'failed',
|
||
error_message = %s,
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(str(he.detail), file_id)
|
||
)
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Upload failed (inner): {e}", exc_info=True)
|
||
# Mark file as failed if we have file_id
|
||
if 'file_id' in locals():
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'failed',
|
||
error_message = %s,
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(str(e), file_id)
|
||
)
|
||
raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}")
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Upload failed (outer): {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}")
|
||
|
||
|
||
# ========== ECONOMIC SYNC ==========
|
||
|
||
|
||
|
||
@router.post("/supplier-invoices/{invoice_id}/send-to-economic")
|
||
async def send_invoice_to_economic(invoice_id: int):
|
||
"""Send supplier invoice to e-conomic - requires separate implementation"""
|
||
raise HTTPException(status_code=501, detail="e-conomic integration kommer senere")
|
||
|
||
|
||
@router.post("/supplier-invoices/reprocess/{file_id}")
|
||
async def reprocess_uploaded_file(file_id: int):
|
||
"""
|
||
Genbehandl en uploadet fil med template matching
|
||
Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet
|
||
"""
|
||
import json
|
||
from datetime import datetime, timedelta
|
||
|
||
try:
|
||
# Get file record
|
||
file_record = execute_query(
|
||
"SELECT * FROM incoming_files WHERE file_id = %s",
|
||
(file_id,),
|
||
fetchone=True
|
||
)
|
||
|
||
if not file_record:
|
||
raise HTTPException(status_code=404, detail=f"Fil {file_id} ikke fundet")
|
||
|
||
file_path = Path(file_record['file_path'])
|
||
if not file_path.exists():
|
||
raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}")
|
||
|
||
logger.info(f"<EFBFBD><EFBFBD> Genbehandler fil {file_id}: {file_record['filename']}")
|
||
|
||
# Extract text from file
|
||
text = await ollama_service._extract_text_from_file(file_path)
|
||
|
||
# Try template matching
|
||
template_id, confidence = template_service.match_template(text)
|
||
|
||
extracted_fields = {}
|
||
vendor_id = None
|
||
|
||
if template_id and confidence >= 0.5:
|
||
logger.info(f"✅ Matched template {template_id} ({confidence:.0%})")
|
||
extracted_fields = template_service.extract_fields(text, template_id)
|
||
|
||
# Check if this is an invoice2data template (ID -1)
|
||
is_invoice2data = (template_id == -1)
|
||
|
||
if is_invoice2data:
|
||
# Invoice2data doesn't have vendor in cache
|
||
logger.info(f"📋 Using invoice2data template")
|
||
# Try to find vendor from extracted CVR
|
||
if extracted_fields.get('vendor_vat'):
|
||
vendor = execute_query(
|
||
"SELECT id FROM vendors WHERE cvr_number = %s",
|
||
(extracted_fields['vendor_vat'],),
|
||
fetchone=True
|
||
)
|
||
if vendor:
|
||
vendor_id = vendor['id']
|
||
|
||
# Store invoice2data extraction in database
|
||
extraction_id = execute_insert(
|
||
"""INSERT INTO extractions
|
||
(file_id, vendor_matched_id, vendor_name, vendor_cvr,
|
||
document_id, document_date, due_date, document_type, document_type_detected,
|
||
total_amount, currency, confidence, llm_response_json, status)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
RETURNING extraction_id""",
|
||
(file_id, vendor_id,
|
||
extracted_fields.get('issuer'), # vendor_name
|
||
extracted_fields.get('vendor_vat'), # vendor_cvr
|
||
str(extracted_fields.get('invoice_number')), # document_id
|
||
extracted_fields.get('invoice_date'), # document_date
|
||
extracted_fields.get('due_date'),
|
||
'invoice', # document_type
|
||
'invoice', # document_type_detected
|
||
extracted_fields.get('amount_total'),
|
||
extracted_fields.get('currency', 'DKK'),
|
||
1.0, # invoice2data always 100% confidence
|
||
json.dumps(extracted_fields), # llm_response_json
|
||
'extracted') # status
|
||
)
|
||
|
||
# Insert line items if extracted
|
||
if extracted_fields.get('lines'):
|
||
for idx, line in enumerate(extracted_fields['lines'], start=1):
|
||
execute_insert(
|
||
"""INSERT INTO extraction_lines
|
||
(extraction_id, line_number, description, quantity, unit_price,
|
||
line_total, vat_rate, vat_note, confidence,
|
||
ip_address, contract_number, location_street, location_zip, location_city)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
RETURNING line_id""",
|
||
(extraction_id, idx, line.get('description'),
|
||
line.get('quantity'), line.get('unit_price'),
|
||
line.get('line_total'), None, None, 1.0,
|
||
line.get('ip_address'), line.get('contract_number'),
|
||
line.get('location_street'), line.get('location_zip'), line.get('location_city'))
|
||
)
|
||
logger.info(f"✅ Saved {len(extracted_fields['lines'])} line items")
|
||
else:
|
||
# Custom template from database
|
||
template = template_service.templates_cache.get(template_id)
|
||
if template:
|
||
vendor_id = template.get('vendor_id')
|
||
|
||
template_service.log_usage(template_id, file_id, True, confidence, extracted_fields)
|
||
|
||
# Update file - use NULL for invoice2data templates to avoid FK constraint
|
||
db_template_id = None if is_invoice2data else template_id
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(db_template_id, file_id)
|
||
)
|
||
else:
|
||
# FALLBACK TO AI EXTRACTION
|
||
logger.info(f"⚠️ Ingen template match (confidence: {confidence:.0%}) - bruger AI extraction")
|
||
|
||
# Use detected vendor from quick analysis if available
|
||
vendor_id = file_record.get('detected_vendor_id')
|
||
|
||
# Call Ollama for full extraction
|
||
logger.info(f"🤖 Calling Ollama for AI extraction...")
|
||
llm_result = await ollama_service.extract_from_text(text)
|
||
|
||
if not llm_result or 'error' in llm_result:
|
||
error_msg = llm_result.get('error') if llm_result else 'AI extraction fejlede'
|
||
logger.error(f"❌ AI extraction failed: {error_msg}")
|
||
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'failed',
|
||
error_message = %s,
|
||
processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(f"AI extraction fejlede: {error_msg}", file_id)
|
||
)
|
||
|
||
raise HTTPException(status_code=500, detail=f"AI extraction fejlede: {error_msg}")
|
||
|
||
extracted_fields = llm_result
|
||
confidence = llm_result.get('confidence', 0.75)
|
||
|
||
# Store AI extracted data in extractions table
|
||
extraction_id = execute_insert(
|
||
"""INSERT INTO supplier_invoice_extractions
|
||
(file_id, vendor_id, invoice_number, invoice_date, due_date,
|
||
total_amount, currency, document_type, confidence, llm_data)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING extraction_id""",
|
||
(file_id, vendor_id,
|
||
llm_result.get('invoice_number'),
|
||
llm_result.get('invoice_date'),
|
||
llm_result.get('due_date'),
|
||
llm_result.get('total_amount'),
|
||
llm_result.get('currency', 'DKK'),
|
||
llm_result.get('document_type'),
|
||
confidence,
|
||
json.dumps(llm_result))
|
||
)
|
||
|
||
# Insert line items if extracted
|
||
if llm_result.get('lines'):
|
||
for idx, line in enumerate(llm_result['lines'], start=1):
|
||
execute_insert(
|
||
"""INSERT INTO extraction_lines
|
||
(extraction_id, line_number, description, quantity, unit_price,
|
||
line_total, vat_rate, vat_note, confidence)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
RETURNING line_id""",
|
||
(extraction_id, idx, line.get('description'),
|
||
line.get('quantity'), line.get('unit_price'),
|
||
line.get('line_total'), line.get('vat_rate'),
|
||
line.get('vat_note'), confidence)
|
||
)
|
||
|
||
# Update file status to ai_extracted
|
||
execute_update(
|
||
"""UPDATE incoming_files
|
||
SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP
|
||
WHERE file_id = %s""",
|
||
(file_id,)
|
||
)
|
||
|
||
logger.info(f"✅ AI extraction completed for file {file_id}")
|
||
|
||
# Return success with template data or AI extraction result
|
||
result = {
|
||
"status": "success",
|
||
"file_id": file_id,
|
||
"filename": file_record['filename'],
|
||
"template_matched": template_id is not None,
|
||
"template_id": template_id,
|
||
"vendor_id": vendor_id,
|
||
"confidence": confidence if template_id else llm_result.get('confidence', 0.75),
|
||
"extracted_fields": extracted_fields,
|
||
"pdf_text": text[:1000] if not template_id else text
|
||
}
|
||
|
||
# Add warning if no template exists
|
||
if not template_id and vendor_id:
|
||
vendor = execute_query(
|
||
"SELECT name FROM vendors WHERE id = %s",
|
||
(vendor_id,),
|
||
fetchone=True
|
||
)
|
||
if vendor:
|
||
result["warning"] = f"⚠️ Ingen template fundet for {vendor['name']} - brugte AI extraction (langsommere)"
|
||
|
||
return result
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"Genbehandling fejlede: {str(e)}")
|
||
|
||
|
||
@router.put("/supplier-invoices/templates/{template_id}")
|
||
async def update_template(
|
||
template_id: int,
|
||
template_name: Optional[str] = None,
|
||
detection_patterns: Optional[List[Dict]] = None,
|
||
field_mappings: Optional[Dict] = None,
|
||
default_product_category: Optional[str] = None,
|
||
is_active: Optional[bool] = None
|
||
):
|
||
"""Opdater eksisterende template"""
|
||
try:
|
||
import json
|
||
|
||
updates = []
|
||
params = []
|
||
|
||
if template_name:
|
||
updates.append("template_name = %s")
|
||
params.append(template_name)
|
||
if detection_patterns is not None:
|
||
updates.append("detection_patterns = %s")
|
||
params.append(json.dumps(detection_patterns))
|
||
if field_mappings is not None:
|
||
updates.append("field_mappings = %s")
|
||
params.append(json.dumps(field_mappings))
|
||
if default_product_category is not None:
|
||
updates.append("default_product_category = %s")
|
||
params.append(default_product_category)
|
||
if is_active is not None:
|
||
updates.append("is_active = %s")
|
||
params.append(is_active)
|
||
|
||
if not updates:
|
||
raise HTTPException(status_code=400, detail="Ingen opdateringer angivet")
|
||
|
||
updates.append("updated_at = CURRENT_TIMESTAMP")
|
||
params.append(template_id)
|
||
|
||
execute_update(
|
||
f"UPDATE supplier_invoice_templates SET {', '.join(updates)} WHERE template_id = %s",
|
||
tuple(params)
|
||
)
|
||
|
||
# Reload templates
|
||
template_service.reload_templates()
|
||
|
||
logger.info(f"✅ Template {template_id} opdateret")
|
||
return {"message": "Template opdateret"}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to update template: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/templates/invoice2data/{template_name}/test")
|
||
async def test_invoice2data_template(template_name: str, request: Dict):
|
||
"""
|
||
Test invoice2data YAML template mod PDF tekst
|
||
|
||
Request body:
|
||
{
|
||
"pdf_text": "Full PDF text content..."
|
||
}
|
||
|
||
Returns samme format som test_template endpoint
|
||
"""
|
||
try:
|
||
pdf_text = request.get('pdf_text', '')
|
||
if not pdf_text:
|
||
raise HTTPException(status_code=400, detail="pdf_text er påkrævet")
|
||
|
||
# Get invoice2data service
|
||
invoice2data_service = get_invoice2data_service()
|
||
|
||
# Check if template exists
|
||
if template_name not in invoice2data_service.templates:
|
||
raise HTTPException(status_code=404, detail=f"Template '{template_name}' ikke fundet")
|
||
|
||
template_data = invoice2data_service.templates[template_name]
|
||
|
||
# Test extraction
|
||
result = invoice2data_service.extract_with_template(pdf_text, template_name)
|
||
|
||
if not result:
|
||
# Template didn't match
|
||
keywords = template_data.get('keywords', [])
|
||
detection_results = []
|
||
for keyword in keywords:
|
||
found = str(keyword).lower() in pdf_text.lower()
|
||
detection_results.append({
|
||
"pattern": str(keyword),
|
||
"type": "keyword",
|
||
"found": found,
|
||
"weight": 0.5
|
||
})
|
||
|
||
return {
|
||
"matched": False,
|
||
"confidence": 0.0,
|
||
"extracted_fields": {},
|
||
"line_items": [],
|
||
"detection_results": detection_results,
|
||
"template_name": template_name,
|
||
"error": "Template matchede ikke PDF'en"
|
||
}
|
||
|
||
# Extract line items
|
||
line_items = []
|
||
if 'lines' in result:
|
||
for line in result['lines']:
|
||
line_items.append({
|
||
"line_number": line.get('line_number', ''),
|
||
"item_number": line.get('item_number', ''),
|
||
"description": line.get('description_raw', '') or line.get('description', ''),
|
||
"quantity": line.get('quantity', ''),
|
||
"unit_price": line.get('unit_price', ''),
|
||
"line_total": line.get('line_total', ''),
|
||
# Context fields (circuit/location info)
|
||
"circuit_id": line.get('circuit_id', ''),
|
||
"ip_address": line.get('ip_address', ''),
|
||
"contract_number": line.get('contract_number', ''),
|
||
"location_street": line.get('location_street', ''),
|
||
"location_zip": line.get('location_zip', ''),
|
||
"location_city": line.get('location_city', ''),
|
||
})
|
||
|
||
# Build detection results
|
||
keywords = template_data.get('keywords', [])
|
||
detection_results = []
|
||
matched_count = 0
|
||
for keyword in keywords:
|
||
found = str(keyword).lower() in pdf_text.lower()
|
||
if found:
|
||
matched_count += 1
|
||
detection_results.append({
|
||
"pattern": str(keyword),
|
||
"type": "keyword",
|
||
"found": found,
|
||
"weight": 0.5
|
||
})
|
||
|
||
confidence = matched_count / len(keywords) if keywords else 1.0
|
||
|
||
# Remove 'lines' from extracted_fields to avoid duplication
|
||
extracted_fields = {k: v for k, v in result.items() if k != 'lines'}
|
||
|
||
return {
|
||
"matched": True,
|
||
"confidence": confidence,
|
||
"extracted_fields": extracted_fields,
|
||
"line_items": line_items,
|
||
"detection_results": detection_results,
|
||
"template_name": template_name
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Invoice2data template test failed: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.post("/supplier-invoices/templates/{template_id}/test")
|
||
async def test_template(template_id: int, request: Dict):
|
||
"""
|
||
Test template mod PDF tekst
|
||
|
||
Request body:
|
||
{
|
||
"pdf_text": "Full PDF text content..."
|
||
}
|
||
|
||
Returns:
|
||
{
|
||
"matched": true/false,
|
||
"confidence": 0.85,
|
||
"extracted_fields": {
|
||
"invoice_number": "12345",
|
||
"invoice_date": "01/12-25",
|
||
"total_amount": "1234.56",
|
||
"vendor_cvr": "12345678"
|
||
},
|
||
"detection_results": [
|
||
{"pattern": "BMC Denmark ApS", "found": true, "weight": 0.5}
|
||
]
|
||
}
|
||
"""
|
||
try:
|
||
import re
|
||
import json
|
||
|
||
pdf_text = request.get('pdf_text', '')
|
||
if not pdf_text:
|
||
raise HTTPException(status_code=400, detail="pdf_text er påkrævet")
|
||
|
||
# Fetch template
|
||
query = "SELECT * FROM supplier_invoice_templates WHERE template_id = %s"
|
||
template = execute_query(query, (template_id,))
|
||
if not template:
|
||
raise HTTPException(status_code=404, detail="Template ikke fundet")
|
||
|
||
template = template[0]
|
||
detection_patterns = template.get('detection_patterns', [])
|
||
field_mappings = template.get('field_mappings', {})
|
||
|
||
# Test detection patterns
|
||
total_score = 0.0
|
||
max_score = 0.0
|
||
detection_results = []
|
||
|
||
for pattern in detection_patterns:
|
||
pattern_type = pattern.get('type', 'text')
|
||
pattern_value = pattern.get('pattern', '')
|
||
weight = float(pattern.get('weight', 0.5))
|
||
max_score += weight
|
||
|
||
found = False
|
||
if pattern_type == 'text' and pattern_value in pdf_text:
|
||
found = True
|
||
total_score += weight
|
||
|
||
detection_results.append({
|
||
"pattern": pattern_value,
|
||
"type": pattern_type,
|
||
"found": found,
|
||
"weight": weight
|
||
})
|
||
|
||
confidence = (total_score / max_score) if max_score > 0 else 0.0
|
||
matched = confidence >= 0.7 # Match threshold
|
||
|
||
# Extract fields if matched
|
||
extracted_fields = {}
|
||
if matched:
|
||
for field_name, field_config in field_mappings.items():
|
||
pattern = field_config.get('pattern', '')
|
||
group = field_config.get('group', 1)
|
||
|
||
# Skip non-field patterns (lines_start, lines_end, line_item)
|
||
if field_name in ['lines_start', 'lines_end', 'line_item']:
|
||
continue
|
||
|
||
try:
|
||
match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE)
|
||
if match and len(match.groups()) >= group:
|
||
extracted_fields[field_name] = match.group(group).strip()
|
||
except Exception as e:
|
||
logger.warning(f"Pattern match failed for {field_name}: {e}")
|
||
|
||
# Extract line items if matched
|
||
line_items = []
|
||
if matched:
|
||
# Extract line items using smart extraction
|
||
lines_start = field_mappings.get('lines_start', {}).get('pattern')
|
||
lines_end = field_mappings.get('lines_end', {}).get('pattern')
|
||
line_pattern = field_mappings.get('line_item', {}).get('pattern')
|
||
line_fields = field_mappings.get('line_item', {}).get('fields', [])
|
||
|
||
if line_pattern or lines_start:
|
||
# Extract section between start and end markers
|
||
text_section = pdf_text
|
||
if lines_start:
|
||
try:
|
||
start_match = re.search(lines_start, pdf_text, re.IGNORECASE)
|
||
if start_match:
|
||
text_section = pdf_text[start_match.end():]
|
||
logger.debug(f"Found lines_start at position {start_match.end()}")
|
||
except Exception as e:
|
||
logger.warning(f"Failed to find lines_start: {e}")
|
||
|
||
if lines_end:
|
||
try:
|
||
end_match = re.search(lines_end, text_section, re.IGNORECASE)
|
||
if end_match:
|
||
text_section = text_section[:end_match.start()]
|
||
logger.debug(f"Found lines_end at position {end_match.start()}")
|
||
except Exception as e:
|
||
logger.warning(f"Failed to find lines_end: {e}")
|
||
|
||
# Try pattern first, then smart extraction
|
||
if line_pattern:
|
||
try:
|
||
for match in re.finditer(line_pattern, text_section, re.MULTILINE):
|
||
line_data = {
|
||
'line_number': len(line_items) + 1,
|
||
'raw_text': match.group(0)
|
||
}
|
||
|
||
for idx, field_name in enumerate(line_fields, start=1):
|
||
if idx <= len(match.groups()):
|
||
line_data[field_name] = match.group(idx).strip()
|
||
|
||
line_items.append(line_data)
|
||
except Exception as e:
|
||
logger.error(f"❌ Pattern extraction failed: {e}")
|
||
|
||
# Fallback to smart extraction if no lines found
|
||
if not line_items:
|
||
logger.info("🧠 Trying smart extraction...")
|
||
logger.debug(f"Text section length: {len(text_section)}, first 500 chars: {text_section[:500]}")
|
||
line_items = _smart_extract_lines(text_section)
|
||
logger.info(f"🧠 Smart extraction returned {len(line_items)} items")
|
||
|
||
if line_items:
|
||
logger.info(f"📦 Extracted {len(line_items)} line items from test")
|
||
else:
|
||
logger.warning(f"⚠️ No line items matched. Section length: {len(text_section)} chars")
|
||
logger.debug(f"Section preview: {text_section[:300]}")
|
||
|
||
logger.info(f"🧪 Template {template_id} test: matched={matched}, confidence={confidence:.2f}, lines={len(line_items)}")
|
||
|
||
return {
|
||
"matched": matched,
|
||
"confidence": round(confidence, 2),
|
||
"extracted_fields": extracted_fields,
|
||
"line_items": line_items,
|
||
"detection_results": detection_results,
|
||
"template_name": template.get('template_name', '')
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Template test failed: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.put("/supplier-invoices/templates/invoice2data/{template_name}/category")
|
||
async def update_yaml_category(template_name: str, request: Dict):
|
||
"""
|
||
Opdater default_product_category i YAML template fil
|
||
|
||
Request body:
|
||
{
|
||
"category": "drift" // varesalg, drift, anlæg, abonnement, lager, udlejning
|
||
}
|
||
"""
|
||
try:
|
||
import yaml
|
||
from pathlib import Path
|
||
|
||
new_category = request.get('category')
|
||
if not new_category:
|
||
raise HTTPException(status_code=400, detail="category er påkrævet")
|
||
|
||
# Validate category
|
||
valid_categories = ['varesalg', 'drift', 'anlæg', 'abonnement', 'lager', 'udlejning']
|
||
if new_category not in valid_categories:
|
||
raise HTTPException(status_code=400, detail=f"Ugyldig kategori. Skal være en af: {', '.join(valid_categories)}")
|
||
|
||
# Find YAML file
|
||
templates_dir = Path(__file__).parent.parent.parent.parent / 'data' / 'invoice_templates'
|
||
yaml_file = templates_dir / f"{template_name}.yml"
|
||
|
||
if not yaml_file.exists():
|
||
raise HTTPException(status_code=404, detail=f"YAML fil ikke fundet: {template_name}.yml")
|
||
|
||
# Load YAML
|
||
with open(yaml_file, 'r', encoding='utf-8') as f:
|
||
template_data = yaml.safe_load(f)
|
||
|
||
# Update category
|
||
template_data['default_product_category'] = new_category
|
||
|
||
# Save YAML with preserved formatting
|
||
with open(yaml_file, 'w', encoding='utf-8') as f:
|
||
yaml.dump(template_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||
|
||
# Reload invoice2data service to pick up changes
|
||
invoice2data_service = get_invoice2data_service()
|
||
invoice2data_service.__init__() # Reinitialize to reload templates
|
||
|
||
logger.info(f"✅ Updated category for {template_name}.yml to {new_category}")
|
||
|
||
return {
|
||
"message": "Kategori opdateret",
|
||
"template_name": template_name,
|
||
"new_category": new_category
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to update YAML category: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/supplier-invoices/templates/invoice2data/{template_name}/content")
|
||
async def get_yaml_content(template_name: str):
|
||
"""
|
||
Hent råt YAML indhold fra template fil
|
||
|
||
Returns:
|
||
{
|
||
"content": "issuer: DCS ApS\nkeywords: ..."
|
||
}
|
||
"""
|
||
try:
|
||
from pathlib import Path
|
||
|
||
# Find template file
|
||
template_dir = Path("data/invoice_templates")
|
||
template_file = template_dir / f"{template_name}.yml"
|
||
|
||
if not template_file.exists():
|
||
raise HTTPException(status_code=404, detail=f"Template fil ikke fundet: {template_name}.yml")
|
||
|
||
# Read file content
|
||
content = template_file.read_text(encoding='utf-8')
|
||
|
||
return {
|
||
"template_name": template_name,
|
||
"filename": f"{template_name}.yml",
|
||
"content": content
|
||
}
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to read YAML content: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.delete("/supplier-invoices/templates/{template_id}")
|
||
async def delete_template(template_id: int):
|
||
"""Slet template (soft delete - sæt is_active=false)"""
|
||
try:
|
||
execute_update(
|
||
"UPDATE supplier_invoice_templates SET is_active = false WHERE template_id = %s",
|
||
(template_id,)
|
||
)
|
||
|
||
template_service.reload_templates()
|
||
|
||
logger.info(f"✅ Template {template_id} deaktiveret")
|
||
return {"message": "Template slettet"}
|
||
except Exception as e:
|
||
logger.error(f"❌ Failed to delete template: {e}")
|
||
raise HTTPException(status_code=500, detail=str(e))
|