""" Supplier Invoices Router - Leverandørfakturaer (Kassekladde) Backend API for managing supplier invoices that integrate with e-conomic """ from fastapi import APIRouter, HTTPException, UploadFile, File from typing import List, Dict, Optional from datetime import datetime, date, timedelta from decimal import Decimal from pathlib import Path from app.core.database import execute_query, execute_insert, execute_update from app.core.config import settings from app.services.economic_service import get_economic_service from app.services.ollama_service import ollama_service from app.services.template_service import template_service import logging import os import re logger = logging.getLogger(__name__) router = APIRouter() def _smart_extract_lines(text: str) -> List[Dict]: """ Multi-line extraction for ALSO invoices. Format: 100 48023976 REFURB LENOVO ThinkPad P15 G1 Grde A ...metadata lines... 1ST 3.708,27 3.708,27 Combines data from description line + price line. """ lines_arr = text.split('\n') items = [] i = 0 while i < len(lines_arr): line = lines_arr[i].strip() # Skip empty or header lines if not line or re.search(r'(Position|Varenr|Beskrivelse|Antal|Pris|Total|Model)', line, re.IGNORECASE): i += 1 continue # Pattern 1: ALSO format - "100 48023976 REFURB LENOVO..." item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line) if item_match: position = item_match.group(1) item_number = item_match.group(2) description = item_match.group(3).strip() # Find næste linje med antal+priser quantity = None unit_price = None total_price = None for j in range(i+1, min(i+10, len(lines_arr))): price_line = lines_arr[j].strip() price_match = re.match(r'^(\d+)\s*(?:ST|stk|pc|pcs)\s+([\d.,]+)\s+([\d.,]+)', price_line, re.IGNORECASE) if price_match: quantity = price_match.group(1) unit_price = price_match.group(2).replace(',', '.') total_price = price_match.group(3).replace(',', '.') break if quantity and unit_price: items.append({ 'line_number': len(items) + 1, 'position': position, 'item_number': item_number, 'description': description, 'quantity': quantity, 'unit_price': unit_price, 'total_price': total_price, 'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}" }) logger.info(f"✅ ALSO: {item_number} - {description[:30]}...") i += 1 continue # Pattern 2: DCS format - "195006Betalingsmetode... 141,2041,20" dcs_match = re.match(r'^(\d{1,2})(\d{4,6})([^0-9]+?)\s+(\d+)([\d,]+)([\d,]+)$', line) if dcs_match: items.append({ 'line_number': len(items) + 1, 'position': dcs_match.group(1), 'item_number': dcs_match.group(2), 'description': dcs_match.group(3).strip(), 'quantity': dcs_match.group(4), 'unit_price': dcs_match.group(5).replace(',', '.'), 'total_price': dcs_match.group(6).replace(',', '.'), 'raw_text': line }) logger.info(f"✅ DCS: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...") i += 1 continue i += 1 if items: logger.info(f"📦 Multi-line extraction found {len(items)} items") else: logger.warning("⚠️ Multi-line extraction found no items") return items # ========== CRUD OPERATIONS ========== @router.get("/supplier-invoices") async def list_supplier_invoices( status: Optional[str] = None, vendor_id: Optional[int] = None, overdue_only: bool = False ): """ List all supplier invoices with filtering options Args: status: Filter by status (pending, approved, sent_to_economic, paid, overdue, cancelled) vendor_id: Filter by vendor overdue_only: Only show overdue unpaid invoices """ try: query = """ SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id, CASE WHEN si.paid_date IS NOT NULL THEN 'paid' WHEN si.due_date < CURRENT_DATE AND si.paid_date IS NULL THEN 'overdue' ELSE si.status END as computed_status FROM supplier_invoices si LEFT JOIN vendors v ON si.vendor_id = v.id WHERE 1=1 """ params = [] if status: query += " AND si.status = %s" params.append(status) if vendor_id: query += " AND si.vendor_id = %s" params.append(vendor_id) if overdue_only: query += " AND si.due_date < CURRENT_DATE AND si.paid_date IS NULL" query += " ORDER BY si.due_date ASC, si.invoice_date DESC" invoices = execute_query(query, tuple(params) if params else ()) # Add lines to each invoice for invoice in invoices: lines = execute_query( "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", (invoice['id'],) ) invoice['lines'] = lines return invoices except Exception as e: logger.error(f"❌ Failed to list supplier invoices: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/supplier-invoices/pending-files") async def get_pending_files(): """Hent liste over filer der venter på behandling""" try: files = execute_query( """SELECT file_id, filename, status, uploaded_at, error_message, template_id FROM incoming_files WHERE status IN ('pending', 'processing', 'failed') ORDER BY uploaded_at DESC""" ) return {"files": files if files else [], "count": len(files) if files else 0} except Exception as e: logger.error(f"❌ Failed to get pending files: {e}") raise HTTPException(status_code=500, detail=str(e)) # ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ========== @router.get("/supplier-invoices/templates") async def list_templates(): """Hent alle templates""" try: query = """ SELECT t.*, v.name as vendor_name FROM supplier_invoice_templates t LEFT JOIN vendors v ON t.vendor_id = v.id WHERE t.is_active = true ORDER BY t.created_at DESC """ templates = execute_query(query) return templates if templates else [] except Exception as e: logger.error(f"❌ Failed to list templates: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/supplier-invoices/{invoice_id}") async def get_supplier_invoice(invoice_id: int): """Get single supplier invoice with lines""" try: invoice = execute_query( """SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id FROM supplier_invoices si LEFT JOIN vendors v ON si.vendor_id = v.id WHERE si.id = %s""", (invoice_id,), fetchone=True ) if not invoice: raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") # Get lines lines = execute_query( "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", (invoice_id,) ) invoice['lines'] = lines return invoice except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to get supplier invoice {invoice_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/supplier-invoices") async def create_supplier_invoice(data: Dict): """ Create new supplier invoice Required fields: - invoice_number: str - vendor_id: int - invoice_date: str (YYYY-MM-DD) - total_amount: float Optional fields: - due_date: str (YYYY-MM-DD) - defaults to invoice_date + 30 days - vat_amount: float - net_amount: float - currency: str (default 'DKK') - description: str - notes: str - lines: List[Dict] with line items """ try: # Validate required fields required = ['invoice_number', 'vendor_id', 'invoice_date', 'total_amount'] missing = [f for f in required if f not in data] if missing: raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}") # Calculate due_date if not provided (30 days default) invoice_date = datetime.fromisoformat(data['invoice_date']) due_date = data.get('due_date') if not due_date: due_date = (invoice_date + timedelta(days=30)).strftime('%Y-%m-%d') # Insert supplier invoice invoice_id = execute_insert( """INSERT INTO supplier_invoices (invoice_number, vendor_id, vendor_name, invoice_date, due_date, total_amount, vat_amount, net_amount, currency, description, notes, status, created_by) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'pending', %s)""", ( data['invoice_number'], data['vendor_id'], data.get('vendor_name'), data['invoice_date'], due_date, data['total_amount'], data.get('vat_amount', 0), data.get('net_amount', data['total_amount']), data.get('currency', 'DKK'), data.get('description'), data.get('notes'), data.get('created_by') ) ) # Insert lines if provided if data.get('lines'): for idx, line in enumerate(data['lines'], start=1): execute_insert( """INSERT INTO supplier_invoice_lines (supplier_invoice_id, line_number, description, quantity, unit_price, line_total, vat_code, vat_rate, vat_amount, contra_account, sku) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", ( invoice_id, line.get('line_number', idx), line.get('description'), line.get('quantity', 1), line.get('unit_price', 0), line.get('line_total', 0), line.get('vat_code', 'I25'), line.get('vat_rate', 25.00), line.get('vat_amount', 0), line.get('contra_account', '5810'), line.get('sku') ) ) logger.info(f"✅ Created supplier invoice: {data['invoice_number']} (ID: {invoice_id})") return { "success": True, "invoice_id": invoice_id, "invoice_number": data['invoice_number'], "due_date": due_date } except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to create supplier invoice: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.put("/supplier-invoices/{invoice_id}") async def update_supplier_invoice(invoice_id: int, data: Dict): """Update supplier invoice details""" try: # Check if invoice exists existing = execute_query( "SELECT id, status FROM supplier_invoices WHERE id = %s", (invoice_id,), fetchone=True ) if not existing: raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") # Don't allow editing if already sent to e-conomic if existing['status'] == 'sent_to_economic': raise HTTPException( status_code=400, detail="Cannot edit invoice that has been sent to e-conomic" ) # Build update query dynamically based on provided fields update_fields = [] params = [] allowed_fields = ['invoice_number', 'vendor_id', 'vendor_name', 'invoice_date', 'due_date', 'total_amount', 'vat_amount', 'net_amount', 'currency', 'description', 'notes', 'status'] for field in allowed_fields: if field in data: update_fields.append(f"{field} = %s") params.append(data[field]) if not update_fields: raise HTTPException(status_code=400, detail="No fields to update") params.append(invoice_id) query = f""" UPDATE supplier_invoices SET {', '.join(update_fields)}, updated_at = CURRENT_TIMESTAMP WHERE id = %s """ execute_update(query, tuple(params)) logger.info(f"✅ Updated supplier invoice {invoice_id}") return {"success": True, "invoice_id": invoice_id} except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to update supplier invoice {invoice_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.delete("/supplier-invoices/{invoice_id}") async def delete_supplier_invoice(invoice_id: int): """Delete supplier invoice (soft delete if integrated with e-conomic)""" try: invoice = execute_query( "SELECT id, invoice_number, economic_voucher_number FROM supplier_invoices WHERE id = %s", (invoice_id,), fetchone=True ) if not invoice: raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") # If sent to e-conomic, only mark as cancelled (don't delete) if invoice.get('economic_voucher_number'): execute_update( "UPDATE supplier_invoices SET status = 'cancelled', updated_at = CURRENT_TIMESTAMP WHERE id = %s", (invoice_id,) ) logger.info(f"⚠️ Marked supplier invoice {invoice['invoice_number']} as cancelled (sent to e-conomic)") return {"success": True, "message": "Invoice marked as cancelled", "invoice_id": invoice_id} # Otherwise, delete invoice and lines execute_update("DELETE FROM supplier_invoice_lines WHERE supplier_invoice_id = %s", (invoice_id,)) execute_update("DELETE FROM supplier_invoices WHERE id = %s", (invoice_id,)) logger.info(f"🗑️ Deleted supplier invoice {invoice['invoice_number']} (ID: {invoice_id})") return {"success": True, "message": "Invoice deleted", "invoice_id": invoice_id} except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to delete supplier invoice {invoice_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) # ========== E-CONOMIC INTEGRATION ========== @router.post("/supplier-invoices/{invoice_id}/approve") async def approve_supplier_invoice(invoice_id: int, approved_by: str): """Approve supplier invoice for payment""" try: invoice = execute_query( "SELECT id, invoice_number, status FROM supplier_invoices WHERE id = %s", (invoice_id,), fetchone=True ) if not invoice: raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") if invoice['status'] != 'pending': raise HTTPException(status_code=400, detail=f"Invoice is already {invoice['status']}") execute_update( """UPDATE supplier_invoices SET status = 'approved', approved_by = %s, approved_at = CURRENT_TIMESTAMP WHERE id = %s""", (approved_by, invoice_id) ) logger.info(f"✅ Approved supplier invoice {invoice['invoice_number']} by {approved_by}") return {"success": True, "invoice_id": invoice_id, "approved_by": approved_by} except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to approve invoice {invoice_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/supplier-invoices/{invoice_id}/send-to-economic") async def send_to_economic(invoice_id: int): """ Send approved supplier invoice to e-conomic kassekladde Creates voucher entry in e-conomic journals """ try: # Get invoice with lines invoice = execute_query( """SELECT si.*, v.economic_supplier_number as vendor_economic_id, v.name as vendor_full_name FROM supplier_invoices si LEFT JOIN vendors v ON si.vendor_id = v.id WHERE si.id = %s""", (invoice_id,), fetchone=True ) if not invoice: raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") if invoice['status'] != 'approved': raise HTTPException(status_code=400, detail="Invoice must be approved before sending to e-conomic") if invoice.get('economic_voucher_number'): raise HTTPException(status_code=400, detail="Invoice already sent to e-conomic") # Get lines lines = execute_query( "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", (invoice_id,) ) if not lines: raise HTTPException(status_code=400, detail="Invoice must have at least one line item") # Check if vendor exists in e-conomic economic = get_economic_service() vendor_economic_id = invoice.get('vendor_economic_id') # If vendor not in e-conomic, create it if not vendor_economic_id: vendor_result = await economic.search_supplier_by_name(invoice.get('vendor_full_name') or invoice.get('vendor_name')) if vendor_result: vendor_economic_id = vendor_result['supplierNumber'] # Update local vendor record execute_update( "UPDATE vendors SET economic_supplier_number = %s WHERE id = %s", (vendor_economic_id, invoice['vendor_id']) ) else: # Create new supplier in e-conomic new_supplier = await economic.create_supplier({ 'name': invoice.get('vendor_full_name') or invoice.get('vendor_name'), 'currency': invoice.get('currency', 'DKK') }) if new_supplier and new_supplier.get('supplierNumber'): vendor_economic_id = new_supplier['supplierNumber'] else: raise HTTPException(status_code=500, detail="Failed to create supplier in e-conomic") # Get default journal number from settings journal_setting = execute_query( "SELECT setting_value FROM supplier_invoice_settings WHERE setting_key = 'economic_default_journal'", fetchone=True ) journal_number = int(journal_setting['setting_value']) if journal_setting else 1 # Build VAT breakdown from lines vat_breakdown = {} line_items = [] for line in lines: vat_code = line.get('vat_code', 'I25') if vat_code not in vat_breakdown: vat_breakdown[vat_code] = { 'net': 0, 'vat': 0, 'gross': 0, 'rate': line.get('vat_rate', 25.00) } line_total = float(line.get('line_total', 0)) vat_amount = float(line.get('vat_amount', 0)) net_amount = line_total - vat_amount vat_breakdown[vat_code]['net'] += net_amount vat_breakdown[vat_code]['vat'] += vat_amount vat_breakdown[vat_code]['gross'] += line_total line_items.append({ 'description': line.get('description'), 'quantity': float(line.get('quantity', 1)), 'unit_price': float(line.get('unit_price', 0)), 'line_total': line_total, 'vat_code': vat_code, 'vat_amount': vat_amount, 'contra_account': line.get('contra_account', '5810'), 'sku': line.get('sku') }) # Send to e-conomic result = await economic.create_journal_supplier_invoice( journal_number=journal_number, supplier_number=vendor_economic_id, invoice_number=invoice['invoice_number'], invoice_date=invoice['invoice_date'].isoformat() if isinstance(invoice['invoice_date'], date) else invoice['invoice_date'], total_amount=float(invoice['total_amount']), vat_breakdown=vat_breakdown, line_items=line_items, due_date=invoice['due_date'].isoformat() if invoice.get('due_date') and isinstance(invoice['due_date'], date) else invoice.get('due_date'), text=invoice.get('description') or f"Supplier invoice {invoice['invoice_number']}" ) if result.get('error'): raise HTTPException(status_code=500, detail=result.get('message', 'Failed to create voucher in e-conomic')) # Update invoice with e-conomic details execute_update( """UPDATE supplier_invoices SET status = 'sent_to_economic', economic_supplier_number = %s, economic_journal_number = %s, economic_voucher_number = %s, economic_accounting_year = %s, sent_to_economic_at = CURRENT_TIMESTAMP WHERE id = %s""", ( vendor_economic_id, result['journal_number'], result['voucher_number'], result['accounting_year'], invoice_id ) ) # Upload attachment if file_path exists if invoice.get('file_path') and os.path.exists(invoice['file_path']): attachment_result = await economic.upload_voucher_attachment( journal_number=result['journal_number'], accounting_year=result['accounting_year'], voucher_number=result['voucher_number'], pdf_path=invoice['file_path'], filename=f"{invoice['invoice_number']}.pdf" ) if attachment_result.get('success'): logger.info(f"📎 Uploaded attachment for voucher {result['voucher_number']}") logger.info(f"✅ Sent supplier invoice {invoice['invoice_number']} to e-conomic (voucher #{result['voucher_number']})") return { "success": True, "invoice_id": invoice_id, "voucher_number": result['voucher_number'], "journal_number": result['journal_number'], "accounting_year": result['accounting_year'] } except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to send invoice {invoice_id} to e-conomic: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/supplier-invoices/economic/journals") async def get_economic_journals(): """Get available e-conomic journals (kassekladder)""" try: economic = get_economic_service() journals = await economic.get_supplier_invoice_journals() return {"journals": journals} except Exception as e: logger.error(f"❌ Failed to get e-conomic journals: {e}") raise HTTPException(status_code=500, detail=str(e)) # ========== STATISTICS & REPORTS ========== @router.get("/supplier-invoices/stats/overview") async def get_payment_overview(): """ Get overview of supplier invoices payment status Returns stats for total, paid, overdue, due soon, and pending invoices """ try: today = date.today().isoformat() stats = execute_query(""" SELECT COUNT(*) as total_count, SUM(CASE WHEN paid_date IS NOT NULL THEN 1 ELSE 0 END) as paid_count, SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN 1 ELSE 0 END) as overdue_count, SUM(CASE WHEN paid_date IS NULL AND due_date >= %s AND due_date <= (%s::date + INTERVAL '7 days') THEN 1 ELSE 0 END) as due_soon_count, SUM(CASE WHEN paid_date IS NULL AND (due_date IS NULL OR due_date > (%s::date + INTERVAL '7 days')) THEN 1 ELSE 0 END) as pending_count, SUM(total_amount) as total_amount, SUM(CASE WHEN paid_date IS NOT NULL THEN total_amount ELSE 0 END) as paid_amount, SUM(CASE WHEN paid_date IS NULL THEN total_amount ELSE 0 END) as unpaid_amount, SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN total_amount ELSE 0 END) as overdue_amount FROM supplier_invoices WHERE status != 'cancelled' """, (today, today, today, today, today), fetchone=True) return { "total_invoices": stats.get('total_count', 0) if stats else 0, "paid_count": stats.get('paid_count', 0) if stats else 0, "overdue_count": stats.get('overdue_count', 0) if stats else 0, "due_soon_count": stats.get('due_soon_count', 0) if stats else 0, "pending_count": stats.get('pending_count', 0) if stats else 0, "total_amount": float(stats.get('total_amount', 0) or 0) if stats else 0, "paid_amount": float(stats.get('paid_amount', 0) or 0) if stats else 0, "unpaid_amount": float(stats.get('unpaid_amount', 0) or 0) if stats else 0, "overdue_amount": float(stats.get('overdue_amount', 0) or 0) if stats else 0 } except Exception as e: logger.error(f"❌ Failed to get payment overview: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/supplier-invoices/stats/by-vendor") async def get_stats_by_vendor(): """Get supplier invoice statistics grouped by vendor""" try: stats = execute_query(""" SELECT v.id as vendor_id, v.name as vendor_name, COUNT(si.id) as invoice_count, SUM(si.total_amount) as total_amount, SUM(CASE WHEN si.paid_date IS NULL THEN si.total_amount ELSE 0 END) as unpaid_amount, MAX(si.due_date) as latest_due_date FROM vendors v LEFT JOIN supplier_invoices si ON v.id = si.vendor_id WHERE si.status != 'cancelled' OR si.status IS NULL GROUP BY v.id, v.name HAVING COUNT(si.id) > 0 ORDER BY unpaid_amount DESC """) return {"vendor_stats": stats} except Exception as e: logger.error(f"❌ Failed to get vendor stats: {e}") raise HTTPException(status_code=500, detail=str(e)) # ========== UPLOAD & AI EXTRACTION ========== @router.post("/supplier-invoices/upload") async def upload_supplier_invoice(file: UploadFile = File(...)): """ Upload supplier invoice (PDF/image) and extract data using templates Process: 1. Validate file type and size 2. Calculate SHA256 checksum for duplicate detection 3. Save file to uploads directory 4. Extract text (PDF/OCR) 5. Match template based on PDF content 6. Extract fields using template regex patterns 7. Show form with pre-filled data for user review Returns: { "status": "success|duplicate|needs_review", "file_id": int, "template_matched": bool, "template_id": int, "extracted_fields": dict, "confidence": float, "pdf_text": str # For manual review } """ try: # Validate file extension suffix = Path(file.filename).suffix.lower() if suffix not in settings.ALLOWED_EXTENSIONS: raise HTTPException( status_code=400, detail=f"Filtype {suffix} ikke tilladt. Tilladte: {', '.join(settings.ALLOWED_EXTENSIONS)}" ) # Create upload directory upload_dir = Path(settings.UPLOAD_DIR) upload_dir.mkdir(parents=True, exist_ok=True) # Save file temporarily to calculate checksum temp_path = upload_dir / f"temp_{datetime.now().timestamp()}_{file.filename}" try: # Validate file size while saving max_size = settings.MAX_FILE_SIZE_MB * 1024 * 1024 total_size = 0 with open(temp_path, "wb") as buffer: while chunk := await file.read(8192): total_size += len(chunk) if total_size > max_size: temp_path.unlink(missing_ok=True) raise HTTPException( status_code=413, detail=f"Fil for stor (max {settings.MAX_FILE_SIZE_MB}MB)" ) buffer.write(chunk) logger.info(f"📥 Uploaded file: {file.filename} ({total_size} bytes)") # Calculate SHA256 checksum checksum = ollama_service.calculate_file_checksum(temp_path) # Check for duplicate file existing_file = execute_query( "SELECT file_id, status FROM incoming_files WHERE checksum = %s", (checksum,), fetchone=True ) if existing_file: temp_path.unlink(missing_ok=True) logger.warning(f"⚠️ Duplicate file detected: {checksum[:16]}...") # Get existing invoice if linked existing_invoice = execute_query( """SELECT si.* FROM supplier_invoices si JOIN extractions e ON si.extraction_id = e.extraction_id WHERE e.file_id = %s""", (existing_file['file_id'],), fetchone=True ) return { "status": "duplicate", "message": "Denne fil er allerede uploadet", "file_id": existing_file['file_id'], "invoice_id": existing_invoice['id'] if existing_invoice else None } # Rename to permanent name final_path = upload_dir / file.filename counter = 1 while final_path.exists(): final_path = upload_dir / f"{final_path.stem}_{counter}{final_path.suffix}" counter += 1 temp_path.rename(final_path) logger.info(f"💾 Saved file as: {final_path.name}") # Insert file record file_record = execute_query( """INSERT INTO incoming_files (filename, original_filename, file_path, file_size, mime_type, checksum, status) VALUES (%s, %s, %s, %s, %s, %s, 'processing') RETURNING file_id""", (final_path.name, file.filename, str(final_path), total_size, ollama_service._get_mime_type(final_path), checksum), fetchone=True ) file_id = file_record['file_id'] # Extract text from file logger.info(f"📄 Extracting text from {final_path.suffix}...") text = await ollama_service._extract_text_from_file(final_path) # Try template matching logger.info(f"📋 Matching template...") template_id, confidence = template_service.match_template(text) extracted_fields = {} vendor_id = None if template_id and confidence >= 0.5: # Extract fields using template logger.info(f"✅ Using template {template_id} ({confidence:.0%} confidence)") extracted_fields = template_service.extract_fields(text, template_id) # Get vendor from template template = template_service.templates_cache.get(template_id) if template: vendor_id = template.get('vendor_id') # Log usage template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) # Update file record execute_update( """UPDATE incoming_files SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP WHERE file_id = %s""", (template_id, file_id) ) else: logger.info("ℹ️ No template matched - manual entry required") execute_update( """UPDATE incoming_files SET status = 'pending', processed_at = CURRENT_TIMESTAMP WHERE file_id = %s""", (file_id,) ) # Return data for user to review and confirm return { "status": "needs_review", "file_id": file_id, "template_matched": template_id is not None, "template_id": template_id, "vendor_id": vendor_id, "confidence": confidence, "extracted_fields": extracted_fields, "pdf_text": text[:500], # First 500 chars for reference "message": "Upload gennemført - gennemgå og bekræft data" } except HTTPException: raise except Exception as e: logger.error(f"❌ Upload failed (inner): {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}") except HTTPException: raise except Exception as e: logger.error(f"❌ Upload failed (outer): {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}") # ========== ECONOMIC SYNC ========== @router.post("/supplier-invoices/{invoice_id}/send-to-economic") async def send_invoice_to_economic(invoice_id: int): """Send supplier invoice to e-conomic - requires separate implementation""" raise HTTPException(status_code=501, detail="e-conomic integration kommer senere") @router.post("/supplier-invoices/reprocess/{file_id}") async def reprocess_uploaded_file(file_id: int): """ Genbehandl en uploadet fil med template matching Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet """ try: # Get file record file_record = execute_query( "SELECT * FROM incoming_files WHERE file_id = %s", (file_id,), fetchone=True ) if not file_record: raise HTTPException(status_code=404, detail=f"Fil {file_id} ikke fundet") file_path = Path(file_record['file_path']) if not file_path.exists(): raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}") logger.info(f"�� Genbehandler fil {file_id}: {file_record['filename']}") # Extract text from file text = await ollama_service._extract_text_from_file(file_path) # Try template matching template_id, confidence = template_service.match_template(text) extracted_fields = {} vendor_id = None if template_id and confidence >= 0.5: logger.info(f"✅ Matched template {template_id} ({confidence:.0%})") extracted_fields = template_service.extract_fields(text, template_id) template = template_service.templates_cache.get(template_id) if template: vendor_id = template.get('vendor_id') template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) execute_update( """UPDATE incoming_files SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP WHERE file_id = %s""", (template_id, file_id) ) else: logger.info("ℹ️ Ingen template match") execute_update( """UPDATE incoming_files SET status = 'pending', processed_at = CURRENT_TIMESTAMP WHERE file_id = %s""", (file_id,) ) return { "status": "success", "file_id": file_id, "filename": file_record['filename'], "template_matched": template_id is not None, "template_id": template_id, "vendor_id": vendor_id, "confidence": confidence, "extracted_fields": extracted_fields, "pdf_text": text # Return full text for template builder } except HTTPException: raise except Exception as e: logger.error(f"❌ Reprocess failed for file {file_id}: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Genbehandling fejlede: {str(e)}") # ========== TEMPLATE MANAGEMENT ========== @router.post("/supplier-invoices/ai-analyze") async def ai_analyze_invoice(request: Dict): """Brug AI til at analysere faktura og foreslå template felter""" try: pdf_text = request.get('pdf_text', '') vendor_id = request.get('vendor_id') if not pdf_text: raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet") # Build enhanced PDF text with instruction enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering. RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! REQUIRED STRUKTUR (alle felter skal med): {{ "invoice_number": "5082481", "invoice_date": "24/10-25", "total_amount": "1471.20", "cvr": "29522790", "detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"], "lines_start": "Nr.VarenrTekst", "lines_end": "Subtotal" }} FIND FØLGENDE: 1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice") 2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY) 3. total_amount: Total beløb - Søg efter "Total", "I alt", "Totalbeløb" - Hvis beløbet er på næste linje, match sidste tal - Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20) 4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK") 5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren - Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S") - Website eller email (f.eks. "WWW.DCS.DK") - Adresse element (f.eks. "Høgemosevænget", "Mårkærvej") - UNDGÅ generiske ord som "Faktura", "Danmark", "Side" 6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr") 7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af") VIGTIGT: - detection_patterns SKAL være mindst 3 specifikke tekststrenge - Vælg tekststrenge der er UNIKKE for denne leverandør - LAV IKKE patterns eller line_item - kun udtræk data PDF TEKST: {pdf_text[:2000]} RETURNER KUN JSON - intet andet!""" # Call Ollama logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst") result = await ollama_service.extract_from_text(enhanced_text) if not result: raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen") logger.info(f"✅ AI analyse gennemført: {result}") return result except Exception as e: logger.error(f"❌ AI analyse fejlede: {e}") raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}") @router.post("/supplier-invoices/templates") async def create_template(request: Dict): """ Opret ny template Request body: { "vendor_id": 1, "template_name": "Test Template", "detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}], "field_mappings": {"invoice_number": {"pattern": "Nummer\\s*(\\d+)", "group": 1}} } """ try: import json vendor_id = request.get('vendor_id') template_name = request.get('template_name') detection_patterns = request.get('detection_patterns', []) field_mappings = request.get('field_mappings', {}) if not vendor_id or not template_name: raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet") # Insert template and get template_id query = """ INSERT INTO supplier_invoice_templates (vendor_id, template_name, detection_patterns, field_mappings) VALUES (%s, %s, %s, %s) RETURNING template_id """ result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings))) template_id = result[0]['template_id'] if result else None if not template_id: raise HTTPException(status_code=500, detail="Kunne ikke oprette template") # Reload templates in cache template_service.reload_templates() logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}") return {"template_id": template_id, "message": "Template oprettet"} except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to create template: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.put("/supplier-invoices/templates/{template_id}") async def update_template( template_id: int, template_name: Optional[str] = None, detection_patterns: Optional[List[Dict]] = None, field_mappings: Optional[Dict] = None, is_active: Optional[bool] = None ): """Opdater eksisterende template""" try: import json updates = [] params = [] if template_name: updates.append("template_name = %s") params.append(template_name) if detection_patterns is not None: updates.append("detection_patterns = %s") params.append(json.dumps(detection_patterns)) if field_mappings is not None: updates.append("field_mappings = %s") params.append(json.dumps(field_mappings)) if is_active is not None: updates.append("is_active = %s") params.append(is_active) if not updates: raise HTTPException(status_code=400, detail="Ingen opdateringer angivet") updates.append("updated_at = CURRENT_TIMESTAMP") params.append(template_id) execute_update( f"UPDATE supplier_invoice_templates SET {', '.join(updates)} WHERE template_id = %s", tuple(params) ) # Reload templates template_service.reload_templates() logger.info(f"✅ Template {template_id} opdateret") return {"message": "Template opdateret"} except HTTPException: raise except Exception as e: logger.error(f"❌ Failed to update template: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.post("/supplier-invoices/templates/{template_id}/test") async def test_template(template_id: int, request: Dict): """ Test template mod PDF tekst Request body: { "pdf_text": "Full PDF text content..." } Returns: { "matched": true/false, "confidence": 0.85, "extracted_fields": { "invoice_number": "12345", "invoice_date": "01/12-25", "total_amount": "1234.56", "vendor_cvr": "12345678" }, "detection_results": [ {"pattern": "BMC Denmark ApS", "found": true, "weight": 0.5} ] } """ try: import re import json pdf_text = request.get('pdf_text', '') if not pdf_text: raise HTTPException(status_code=400, detail="pdf_text er påkrævet") # Fetch template query = "SELECT * FROM supplier_invoice_templates WHERE template_id = %s" template = execute_query(query, (template_id,)) if not template: raise HTTPException(status_code=404, detail="Template ikke fundet") template = template[0] detection_patterns = template.get('detection_patterns', []) field_mappings = template.get('field_mappings', {}) # Test detection patterns total_score = 0.0 max_score = 0.0 detection_results = [] for pattern in detection_patterns: pattern_type = pattern.get('type', 'text') pattern_value = pattern.get('pattern', '') weight = float(pattern.get('weight', 0.5)) max_score += weight found = False if pattern_type == 'text' and pattern_value in pdf_text: found = True total_score += weight detection_results.append({ "pattern": pattern_value, "type": pattern_type, "found": found, "weight": weight }) confidence = (total_score / max_score) if max_score > 0 else 0.0 matched = confidence >= 0.7 # Match threshold # Extract fields if matched extracted_fields = {} if matched: for field_name, field_config in field_mappings.items(): pattern = field_config.get('pattern', '') group = field_config.get('group', 1) # Skip non-field patterns (lines_start, lines_end, line_item) if field_name in ['lines_start', 'lines_end', 'line_item']: continue try: match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE) if match and len(match.groups()) >= group: extracted_fields[field_name] = match.group(group).strip() except Exception as e: logger.warning(f"Pattern match failed for {field_name}: {e}") # Extract line items if matched line_items = [] if matched: # Extract line items using smart extraction lines_start = field_mappings.get('lines_start', {}).get('pattern') lines_end = field_mappings.get('lines_end', {}).get('pattern') line_pattern = field_mappings.get('line_item', {}).get('pattern') line_fields = field_mappings.get('line_item', {}).get('fields', []) if line_pattern or lines_start: # Extract section between start and end markers text_section = pdf_text if lines_start: try: start_match = re.search(lines_start, pdf_text, re.IGNORECASE) if start_match: text_section = pdf_text[start_match.end():] logger.debug(f"Found lines_start at position {start_match.end()}") except Exception as e: logger.warning(f"Failed to find lines_start: {e}") if lines_end: try: end_match = re.search(lines_end, text_section, re.IGNORECASE) if end_match: text_section = text_section[:end_match.start()] logger.debug(f"Found lines_end at position {end_match.start()}") except Exception as e: logger.warning(f"Failed to find lines_end: {e}") # Try pattern first, then smart extraction if line_pattern: try: for match in re.finditer(line_pattern, text_section, re.MULTILINE): line_data = { 'line_number': len(line_items) + 1, 'raw_text': match.group(0) } for idx, field_name in enumerate(line_fields, start=1): if idx <= len(match.groups()): line_data[field_name] = match.group(idx).strip() line_items.append(line_data) except Exception as e: logger.error(f"❌ Pattern extraction failed: {e}") # Fallback to smart extraction if no lines found if not line_items: logger.info("🧠 Trying smart extraction...") logger.debug(f"Text section length: {len(text_section)}, first 500 chars: {text_section[:500]}") line_items = _smart_extract_lines(text_section) logger.info(f"🧠 Smart extraction returned {len(line_items)} items") if line_items: logger.info(f"📦 Extracted {len(line_items)} line items from test") else: logger.warning(f"⚠️ No line items matched. Section length: {len(text_section)} chars") logger.debug(f"Section preview: {text_section[:300]}") logger.info(f"🧪 Template {template_id} test: matched={matched}, confidence={confidence:.2f}, lines={len(line_items)}") return { "matched": matched, "confidence": round(confidence, 2), "extracted_fields": extracted_fields, "line_items": line_items, "detection_results": detection_results, "template_name": template.get('template_name', '') } except HTTPException: raise except Exception as e: logger.error(f"❌ Template test failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.delete("/supplier-invoices/templates/{template_id}") async def delete_template(template_id: int): """Slet template (soft delete - sæt is_active=false)""" try: execute_update( "UPDATE supplier_invoice_templates SET is_active = false WHERE template_id = %s", (template_id,) ) template_service.reload_templates() logger.info(f"✅ Template {template_id} deaktiveret") return {"message": "Template slettet"} except Exception as e: logger.error(f"❌ Failed to delete template: {e}") raise HTTPException(status_code=500, detail=str(e))