diff --git a/Dockerfile b/Dockerfile index c505fdb..fac1eb7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,9 @@ RUN apt-get update && apt-get install -y \ git \ libpq-dev \ gcc \ + tesseract-ocr \ + tesseract-ocr-dan \ + tesseract-ocr-eng \ && rm -rf /var/lib/apt/lists/* # Build arguments for GitHub release deployment diff --git a/app/billing/backend/router.py b/app/billing/backend/router.py index 17e52d1..3d72479 100644 --- a/app/billing/backend/router.py +++ b/app/billing/backend/router.py @@ -4,9 +4,13 @@ API endpoints for billing operations """ from fastapi import APIRouter +from . import supplier_invoices router = APIRouter() +# Include supplier invoices router +router.include_router(supplier_invoices.router, prefix="", tags=["Supplier Invoices"]) + @router.get("/billing/invoices") async def list_invoices(): diff --git a/app/billing/backend/supplier_invoices.py b/app/billing/backend/supplier_invoices.py new file mode 100644 index 0000000..17ccbb6 --- /dev/null +++ b/app/billing/backend/supplier_invoices.py @@ -0,0 +1,1333 @@ +""" +Supplier Invoices Router - Leverandørfakturaer (Kassekladde) +Backend API for managing supplier invoices that integrate with e-conomic +""" + +from fastapi import APIRouter, HTTPException, UploadFile, File +from typing import List, Dict, Optional +from datetime import datetime, date, timedelta +from decimal import Decimal +from pathlib import Path +from app.core.database import execute_query, execute_insert, execute_update +from app.core.config import settings +from app.services.economic_service import get_economic_service +from app.services.ollama_service import ollama_service +from app.services.template_service import template_service +import logging +import os +import re + +logger = logging.getLogger(__name__) +router = APIRouter() + + +def _smart_extract_lines(text: str) -> List[Dict]: + """ + Multi-line extraction for ALSO invoices. + + Format: + 100 48023976 REFURB LENOVO ThinkPad P15 G1 Grde A + ...metadata lines... + 1ST 3.708,27 3.708,27 + + Combines data from description line + price line. + """ + lines_arr = text.split('\n') + items = [] + i = 0 + + while i < len(lines_arr): + line = lines_arr[i].strip() + + # Skip empty or header lines + if not line or re.search(r'(Position|Varenr|Beskrivelse|Antal|Pris|Total|Model)', line, re.IGNORECASE): + i += 1 + continue + + # Pattern 1: ALSO format - "100 48023976 REFURB LENOVO..." + item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line) + if item_match: + position = item_match.group(1) + item_number = item_match.group(2) + description = item_match.group(3).strip() + + # Find næste linje med antal+priser + quantity = None + unit_price = None + total_price = None + + for j in range(i+1, min(i+10, len(lines_arr))): + price_line = lines_arr[j].strip() + price_match = re.match(r'^(\d+)\s*(?:ST|stk|pc|pcs)\s+([\d.,]+)\s+([\d.,]+)', price_line, re.IGNORECASE) + if price_match: + quantity = price_match.group(1) + unit_price = price_match.group(2).replace(',', '.') + total_price = price_match.group(3).replace(',', '.') + break + + if quantity and unit_price: + items.append({ + 'line_number': len(items) + 1, + 'position': position, + 'item_number': item_number, + 'description': description, + 'quantity': quantity, + 'unit_price': unit_price, + 'total_price': total_price, + 'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}" + }) + logger.info(f"✅ ALSO: {item_number} - {description[:30]}...") + i += 1 + continue + + # Pattern 2: DCS format - "195006Betalingsmetode... 141,2041,20" + dcs_match = re.match(r'^(\d{1,2})(\d{4,6})([^0-9]+?)\s+(\d+)([\d,]+)([\d,]+)$', line) + if dcs_match: + items.append({ + 'line_number': len(items) + 1, + 'position': dcs_match.group(1), + 'item_number': dcs_match.group(2), + 'description': dcs_match.group(3).strip(), + 'quantity': dcs_match.group(4), + 'unit_price': dcs_match.group(5).replace(',', '.'), + 'total_price': dcs_match.group(6).replace(',', '.'), + 'raw_text': line + }) + logger.info(f"✅ DCS: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...") + i += 1 + continue + + i += 1 + + if items: + logger.info(f"📦 Multi-line extraction found {len(items)} items") + else: + logger.warning("⚠️ Multi-line extraction found no items") + return items + + +# ========== CRUD OPERATIONS ========== + +@router.get("/supplier-invoices") +async def list_supplier_invoices( + status: Optional[str] = None, + vendor_id: Optional[int] = None, + overdue_only: bool = False +): + """ + List all supplier invoices with filtering options + + Args: + status: Filter by status (pending, approved, sent_to_economic, paid, overdue, cancelled) + vendor_id: Filter by vendor + overdue_only: Only show overdue unpaid invoices + """ + try: + query = """ + SELECT + si.*, + v.name as vendor_full_name, + v.economic_supplier_number as vendor_economic_id, + CASE + WHEN si.paid_date IS NOT NULL THEN 'paid' + WHEN si.due_date < CURRENT_DATE AND si.paid_date IS NULL THEN 'overdue' + ELSE si.status + END as computed_status + FROM supplier_invoices si + LEFT JOIN vendors v ON si.vendor_id = v.id + WHERE 1=1 + """ + params = [] + + if status: + query += " AND si.status = %s" + params.append(status) + + if vendor_id: + query += " AND si.vendor_id = %s" + params.append(vendor_id) + + if overdue_only: + query += " AND si.due_date < CURRENT_DATE AND si.paid_date IS NULL" + + query += " ORDER BY si.due_date ASC, si.invoice_date DESC" + + invoices = execute_query(query, tuple(params) if params else ()) + + # Add lines to each invoice + for invoice in invoices: + lines = execute_query( + "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", + (invoice['id'],) + ) + invoice['lines'] = lines + + return invoices + + except Exception as e: + logger.error(f"❌ Failed to list supplier invoices: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/supplier-invoices/pending-files") +async def get_pending_files(): + """Hent liste over filer der venter på behandling""" + try: + files = execute_query( + """SELECT file_id, filename, status, uploaded_at, error_message, template_id + FROM incoming_files + WHERE status IN ('pending', 'processing', 'failed') + ORDER BY uploaded_at DESC""" + ) + return {"files": files if files else [], "count": len(files) if files else 0} + except Exception as e: + logger.error(f"❌ Failed to get pending files: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ========== + +@router.get("/supplier-invoices/templates") +async def list_templates(): + """Hent alle templates""" + try: + query = """ + SELECT t.*, v.name as vendor_name + FROM supplier_invoice_templates t + LEFT JOIN vendors v ON t.vendor_id = v.id + WHERE t.is_active = true + ORDER BY t.created_at DESC + """ + templates = execute_query(query) + + return templates if templates else [] + except Exception as e: + logger.error(f"❌ Failed to list templates: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/supplier-invoices/{invoice_id}") +async def get_supplier_invoice(invoice_id: int): + """Get single supplier invoice with lines""" + try: + invoice = execute_query( + """SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id + FROM supplier_invoices si + LEFT JOIN vendors v ON si.vendor_id = v.id + WHERE si.id = %s""", + (invoice_id,), + fetchone=True + ) + + if not invoice: + raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") + + # Get lines + lines = execute_query( + "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", + (invoice_id,) + ) + invoice['lines'] = lines + + return invoice + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to get supplier invoice {invoice_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices") +async def create_supplier_invoice(data: Dict): + """ + Create new supplier invoice + + Required fields: + - invoice_number: str + - vendor_id: int + - invoice_date: str (YYYY-MM-DD) + - total_amount: float + + Optional fields: + - due_date: str (YYYY-MM-DD) - defaults to invoice_date + 30 days + - vat_amount: float + - net_amount: float + - currency: str (default 'DKK') + - description: str + - notes: str + - lines: List[Dict] with line items + """ + try: + # Validate required fields + required = ['invoice_number', 'vendor_id', 'invoice_date', 'total_amount'] + missing = [f for f in required if f not in data] + if missing: + raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}") + + # Calculate due_date if not provided (30 days default) + invoice_date = datetime.fromisoformat(data['invoice_date']) + due_date = data.get('due_date') + if not due_date: + due_date = (invoice_date + timedelta(days=30)).strftime('%Y-%m-%d') + + # Insert supplier invoice + invoice_id = execute_insert( + """INSERT INTO supplier_invoices + (invoice_number, vendor_id, vendor_name, invoice_date, due_date, + total_amount, vat_amount, net_amount, currency, description, notes, + status, created_by) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'pending', %s)""", + ( + data['invoice_number'], + data['vendor_id'], + data.get('vendor_name'), + data['invoice_date'], + due_date, + data['total_amount'], + data.get('vat_amount', 0), + data.get('net_amount', data['total_amount']), + data.get('currency', 'DKK'), + data.get('description'), + data.get('notes'), + data.get('created_by') + ) + ) + + # Insert lines if provided + if data.get('lines'): + for idx, line in enumerate(data['lines'], start=1): + execute_insert( + """INSERT INTO supplier_invoice_lines + (supplier_invoice_id, line_number, description, quantity, unit_price, + line_total, vat_code, vat_rate, vat_amount, contra_account, sku) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", + ( + invoice_id, + line.get('line_number', idx), + line.get('description'), + line.get('quantity', 1), + line.get('unit_price', 0), + line.get('line_total', 0), + line.get('vat_code', 'I25'), + line.get('vat_rate', 25.00), + line.get('vat_amount', 0), + line.get('contra_account', '5810'), + line.get('sku') + ) + ) + + logger.info(f"✅ Created supplier invoice: {data['invoice_number']} (ID: {invoice_id})") + + return { + "success": True, + "invoice_id": invoice_id, + "invoice_number": data['invoice_number'], + "due_date": due_date + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to create supplier invoice: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.put("/supplier-invoices/{invoice_id}") +async def update_supplier_invoice(invoice_id: int, data: Dict): + """Update supplier invoice details""" + try: + # Check if invoice exists + existing = execute_query( + "SELECT id, status FROM supplier_invoices WHERE id = %s", + (invoice_id,), + fetchone=True + ) + + if not existing: + raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") + + # Don't allow editing if already sent to e-conomic + if existing['status'] == 'sent_to_economic': + raise HTTPException( + status_code=400, + detail="Cannot edit invoice that has been sent to e-conomic" + ) + + # Build update query dynamically based on provided fields + update_fields = [] + params = [] + + allowed_fields = ['invoice_number', 'vendor_id', 'vendor_name', 'invoice_date', + 'due_date', 'total_amount', 'vat_amount', 'net_amount', + 'currency', 'description', 'notes', 'status'] + + for field in allowed_fields: + if field in data: + update_fields.append(f"{field} = %s") + params.append(data[field]) + + if not update_fields: + raise HTTPException(status_code=400, detail="No fields to update") + + params.append(invoice_id) + + query = f""" + UPDATE supplier_invoices + SET {', '.join(update_fields)}, updated_at = CURRENT_TIMESTAMP + WHERE id = %s + """ + + execute_update(query, tuple(params)) + + logger.info(f"✅ Updated supplier invoice {invoice_id}") + + return {"success": True, "invoice_id": invoice_id} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to update supplier invoice {invoice_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/supplier-invoices/{invoice_id}") +async def delete_supplier_invoice(invoice_id: int): + """Delete supplier invoice (soft delete if integrated with e-conomic)""" + try: + invoice = execute_query( + "SELECT id, invoice_number, economic_voucher_number FROM supplier_invoices WHERE id = %s", + (invoice_id,), + fetchone=True + ) + + if not invoice: + raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") + + # If sent to e-conomic, only mark as cancelled (don't delete) + if invoice.get('economic_voucher_number'): + execute_update( + "UPDATE supplier_invoices SET status = 'cancelled', updated_at = CURRENT_TIMESTAMP WHERE id = %s", + (invoice_id,) + ) + logger.info(f"⚠️ Marked supplier invoice {invoice['invoice_number']} as cancelled (sent to e-conomic)") + return {"success": True, "message": "Invoice marked as cancelled", "invoice_id": invoice_id} + + # Otherwise, delete invoice and lines + execute_update("DELETE FROM supplier_invoice_lines WHERE supplier_invoice_id = %s", (invoice_id,)) + execute_update("DELETE FROM supplier_invoices WHERE id = %s", (invoice_id,)) + + logger.info(f"🗑️ Deleted supplier invoice {invoice['invoice_number']} (ID: {invoice_id})") + + return {"success": True, "message": "Invoice deleted", "invoice_id": invoice_id} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to delete supplier invoice {invoice_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# ========== E-CONOMIC INTEGRATION ========== + +@router.post("/supplier-invoices/{invoice_id}/approve") +async def approve_supplier_invoice(invoice_id: int, approved_by: str): + """Approve supplier invoice for payment""" + try: + invoice = execute_query( + "SELECT id, invoice_number, status FROM supplier_invoices WHERE id = %s", + (invoice_id,), + fetchone=True + ) + + if not invoice: + raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") + + if invoice['status'] != 'pending': + raise HTTPException(status_code=400, detail=f"Invoice is already {invoice['status']}") + + execute_update( + """UPDATE supplier_invoices + SET status = 'approved', approved_by = %s, approved_at = CURRENT_TIMESTAMP + WHERE id = %s""", + (approved_by, invoice_id) + ) + + logger.info(f"✅ Approved supplier invoice {invoice['invoice_number']} by {approved_by}") + + return {"success": True, "invoice_id": invoice_id, "approved_by": approved_by} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to approve invoice {invoice_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/{invoice_id}/send-to-economic") +async def send_to_economic(invoice_id: int): + """ + Send approved supplier invoice to e-conomic kassekladde + Creates voucher entry in e-conomic journals + """ + try: + # Get invoice with lines + invoice = execute_query( + """SELECT si.*, v.economic_supplier_number as vendor_economic_id, v.name as vendor_full_name + FROM supplier_invoices si + LEFT JOIN vendors v ON si.vendor_id = v.id + WHERE si.id = %s""", + (invoice_id,), + fetchone=True + ) + + if not invoice: + raise HTTPException(status_code=404, detail=f"Invoice {invoice_id} not found") + + if invoice['status'] != 'approved': + raise HTTPException(status_code=400, detail="Invoice must be approved before sending to e-conomic") + + if invoice.get('economic_voucher_number'): + raise HTTPException(status_code=400, detail="Invoice already sent to e-conomic") + + # Get lines + lines = execute_query( + "SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number", + (invoice_id,) + ) + + if not lines: + raise HTTPException(status_code=400, detail="Invoice must have at least one line item") + + # Check if vendor exists in e-conomic + economic = get_economic_service() + + vendor_economic_id = invoice.get('vendor_economic_id') + + # If vendor not in e-conomic, create it + if not vendor_economic_id: + vendor_result = await economic.search_supplier_by_name(invoice.get('vendor_full_name') or invoice.get('vendor_name')) + + if vendor_result: + vendor_economic_id = vendor_result['supplierNumber'] + # Update local vendor record + execute_update( + "UPDATE vendors SET economic_supplier_number = %s WHERE id = %s", + (vendor_economic_id, invoice['vendor_id']) + ) + else: + # Create new supplier in e-conomic + new_supplier = await economic.create_supplier({ + 'name': invoice.get('vendor_full_name') or invoice.get('vendor_name'), + 'currency': invoice.get('currency', 'DKK') + }) + + if new_supplier and new_supplier.get('supplierNumber'): + vendor_economic_id = new_supplier['supplierNumber'] + else: + raise HTTPException(status_code=500, detail="Failed to create supplier in e-conomic") + + # Get default journal number from settings + journal_setting = execute_query( + "SELECT setting_value FROM supplier_invoice_settings WHERE setting_key = 'economic_default_journal'", + fetchone=True + ) + journal_number = int(journal_setting['setting_value']) if journal_setting else 1 + + # Build VAT breakdown from lines + vat_breakdown = {} + line_items = [] + + for line in lines: + vat_code = line.get('vat_code', 'I25') + + if vat_code not in vat_breakdown: + vat_breakdown[vat_code] = { + 'net': 0, + 'vat': 0, + 'gross': 0, + 'rate': line.get('vat_rate', 25.00) + } + + line_total = float(line.get('line_total', 0)) + vat_amount = float(line.get('vat_amount', 0)) + net_amount = line_total - vat_amount + + vat_breakdown[vat_code]['net'] += net_amount + vat_breakdown[vat_code]['vat'] += vat_amount + vat_breakdown[vat_code]['gross'] += line_total + + line_items.append({ + 'description': line.get('description'), + 'quantity': float(line.get('quantity', 1)), + 'unit_price': float(line.get('unit_price', 0)), + 'line_total': line_total, + 'vat_code': vat_code, + 'vat_amount': vat_amount, + 'contra_account': line.get('contra_account', '5810'), + 'sku': line.get('sku') + }) + + # Send to e-conomic + result = await economic.create_journal_supplier_invoice( + journal_number=journal_number, + supplier_number=vendor_economic_id, + invoice_number=invoice['invoice_number'], + invoice_date=invoice['invoice_date'].isoformat() if isinstance(invoice['invoice_date'], date) else invoice['invoice_date'], + total_amount=float(invoice['total_amount']), + vat_breakdown=vat_breakdown, + line_items=line_items, + due_date=invoice['due_date'].isoformat() if invoice.get('due_date') and isinstance(invoice['due_date'], date) else invoice.get('due_date'), + text=invoice.get('description') or f"Supplier invoice {invoice['invoice_number']}" + ) + + if result.get('error'): + raise HTTPException(status_code=500, detail=result.get('message', 'Failed to create voucher in e-conomic')) + + # Update invoice with e-conomic details + execute_update( + """UPDATE supplier_invoices + SET status = 'sent_to_economic', + economic_supplier_number = %s, + economic_journal_number = %s, + economic_voucher_number = %s, + economic_accounting_year = %s, + sent_to_economic_at = CURRENT_TIMESTAMP + WHERE id = %s""", + ( + vendor_economic_id, + result['journal_number'], + result['voucher_number'], + result['accounting_year'], + invoice_id + ) + ) + + # Upload attachment if file_path exists + if invoice.get('file_path') and os.path.exists(invoice['file_path']): + attachment_result = await economic.upload_voucher_attachment( + journal_number=result['journal_number'], + accounting_year=result['accounting_year'], + voucher_number=result['voucher_number'], + pdf_path=invoice['file_path'], + filename=f"{invoice['invoice_number']}.pdf" + ) + + if attachment_result.get('success'): + logger.info(f"📎 Uploaded attachment for voucher {result['voucher_number']}") + + logger.info(f"✅ Sent supplier invoice {invoice['invoice_number']} to e-conomic (voucher #{result['voucher_number']})") + + return { + "success": True, + "invoice_id": invoice_id, + "voucher_number": result['voucher_number'], + "journal_number": result['journal_number'], + "accounting_year": result['accounting_year'] + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to send invoice {invoice_id} to e-conomic: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/supplier-invoices/economic/journals") +async def get_economic_journals(): + """Get available e-conomic journals (kassekladder)""" + try: + economic = get_economic_service() + journals = await economic.get_supplier_invoice_journals() + return {"journals": journals} + + except Exception as e: + logger.error(f"❌ Failed to get e-conomic journals: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# ========== STATISTICS & REPORTS ========== + +@router.get("/supplier-invoices/stats/overview") +async def get_payment_overview(): + """ + Get overview of supplier invoices payment status + + Returns stats for total, paid, overdue, due soon, and pending invoices + """ + try: + today = date.today().isoformat() + + stats = execute_query(""" + SELECT + COUNT(*) as total_count, + SUM(CASE WHEN paid_date IS NOT NULL THEN 1 ELSE 0 END) as paid_count, + SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN 1 ELSE 0 END) as overdue_count, + SUM(CASE WHEN paid_date IS NULL AND due_date >= %s AND due_date <= (%s::date + INTERVAL '7 days') THEN 1 ELSE 0 END) as due_soon_count, + SUM(CASE WHEN paid_date IS NULL AND (due_date IS NULL OR due_date > (%s::date + INTERVAL '7 days')) THEN 1 ELSE 0 END) as pending_count, + SUM(total_amount) as total_amount, + SUM(CASE WHEN paid_date IS NOT NULL THEN total_amount ELSE 0 END) as paid_amount, + SUM(CASE WHEN paid_date IS NULL THEN total_amount ELSE 0 END) as unpaid_amount, + SUM(CASE WHEN paid_date IS NULL AND due_date < %s THEN total_amount ELSE 0 END) as overdue_amount + FROM supplier_invoices + WHERE status != 'cancelled' + """, (today, today, today, today, today), fetchone=True) + + return { + "total_invoices": stats.get('total_count', 0) if stats else 0, + "paid_count": stats.get('paid_count', 0) if stats else 0, + "overdue_count": stats.get('overdue_count', 0) if stats else 0, + "due_soon_count": stats.get('due_soon_count', 0) if stats else 0, + "pending_count": stats.get('pending_count', 0) if stats else 0, + "total_amount": float(stats.get('total_amount', 0) or 0) if stats else 0, + "paid_amount": float(stats.get('paid_amount', 0) or 0) if stats else 0, + "unpaid_amount": float(stats.get('unpaid_amount', 0) or 0) if stats else 0, + "overdue_amount": float(stats.get('overdue_amount', 0) or 0) if stats else 0 + } + + except Exception as e: + logger.error(f"❌ Failed to get payment overview: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/supplier-invoices/stats/by-vendor") +async def get_stats_by_vendor(): + """Get supplier invoice statistics grouped by vendor""" + try: + stats = execute_query(""" + SELECT + v.id as vendor_id, + v.name as vendor_name, + COUNT(si.id) as invoice_count, + SUM(si.total_amount) as total_amount, + SUM(CASE WHEN si.paid_date IS NULL THEN si.total_amount ELSE 0 END) as unpaid_amount, + MAX(si.due_date) as latest_due_date + FROM vendors v + LEFT JOIN supplier_invoices si ON v.id = si.vendor_id + WHERE si.status != 'cancelled' OR si.status IS NULL + GROUP BY v.id, v.name + HAVING COUNT(si.id) > 0 + ORDER BY unpaid_amount DESC + """) + + return {"vendor_stats": stats} + + except Exception as e: + logger.error(f"❌ Failed to get vendor stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# ========== UPLOAD & AI EXTRACTION ========== + +@router.post("/supplier-invoices/upload") +async def upload_supplier_invoice(file: UploadFile = File(...)): + """ + Upload supplier invoice (PDF/image) and extract data using templates + + Process: + 1. Validate file type and size + 2. Calculate SHA256 checksum for duplicate detection + 3. Save file to uploads directory + 4. Extract text (PDF/OCR) + 5. Match template based on PDF content + 6. Extract fields using template regex patterns + 7. Show form with pre-filled data for user review + + Returns: + { + "status": "success|duplicate|needs_review", + "file_id": int, + "template_matched": bool, + "template_id": int, + "extracted_fields": dict, + "confidence": float, + "pdf_text": str # For manual review + } + """ + try: + # Validate file extension + suffix = Path(file.filename).suffix.lower() + if suffix not in settings.ALLOWED_EXTENSIONS: + raise HTTPException( + status_code=400, + detail=f"Filtype {suffix} ikke tilladt. Tilladte: {', '.join(settings.ALLOWED_EXTENSIONS)}" + ) + + # Create upload directory + upload_dir = Path(settings.UPLOAD_DIR) + upload_dir.mkdir(parents=True, exist_ok=True) + + # Save file temporarily to calculate checksum + temp_path = upload_dir / f"temp_{datetime.now().timestamp()}_{file.filename}" + + try: + # Validate file size while saving + max_size = settings.MAX_FILE_SIZE_MB * 1024 * 1024 + total_size = 0 + + with open(temp_path, "wb") as buffer: + while chunk := await file.read(8192): + total_size += len(chunk) + if total_size > max_size: + temp_path.unlink(missing_ok=True) + raise HTTPException( + status_code=413, + detail=f"Fil for stor (max {settings.MAX_FILE_SIZE_MB}MB)" + ) + buffer.write(chunk) + + logger.info(f"📥 Uploaded file: {file.filename} ({total_size} bytes)") + + # Calculate SHA256 checksum + checksum = ollama_service.calculate_file_checksum(temp_path) + + # Check for duplicate file + existing_file = execute_query( + "SELECT file_id, status FROM incoming_files WHERE checksum = %s", + (checksum,), + fetchone=True + ) + + if existing_file: + temp_path.unlink(missing_ok=True) + logger.warning(f"⚠️ Duplicate file detected: {checksum[:16]}...") + + # Get existing invoice if linked + existing_invoice = execute_query( + """SELECT si.* FROM supplier_invoices si + JOIN extractions e ON si.extraction_id = e.extraction_id + WHERE e.file_id = %s""", + (existing_file['file_id'],), + fetchone=True + ) + + return { + "status": "duplicate", + "message": "Denne fil er allerede uploadet", + "file_id": existing_file['file_id'], + "invoice_id": existing_invoice['id'] if existing_invoice else None + } + + # Rename to permanent name + final_path = upload_dir / file.filename + counter = 1 + while final_path.exists(): + final_path = upload_dir / f"{final_path.stem}_{counter}{final_path.suffix}" + counter += 1 + + temp_path.rename(final_path) + logger.info(f"💾 Saved file as: {final_path.name}") + + # Insert file record + file_record = execute_query( + """INSERT INTO incoming_files + (filename, original_filename, file_path, file_size, mime_type, checksum, status) + VALUES (%s, %s, %s, %s, %s, %s, 'processing') RETURNING file_id""", + (final_path.name, file.filename, str(final_path), total_size, + ollama_service._get_mime_type(final_path), checksum), + fetchone=True + ) + file_id = file_record['file_id'] + + # Extract text from file + logger.info(f"📄 Extracting text from {final_path.suffix}...") + text = await ollama_service._extract_text_from_file(final_path) + + # Try template matching + logger.info(f"📋 Matching template...") + template_id, confidence = template_service.match_template(text) + + extracted_fields = {} + vendor_id = None + + if template_id and confidence >= 0.5: + # Extract fields using template + logger.info(f"✅ Using template {template_id} ({confidence:.0%} confidence)") + extracted_fields = template_service.extract_fields(text, template_id) + + # Get vendor from template + template = template_service.templates_cache.get(template_id) + if template: + vendor_id = template.get('vendor_id') + + # Log usage + template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) + + # Update file record + execute_update( + """UPDATE incoming_files + SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (template_id, file_id) + ) + else: + logger.info("ℹ️ No template matched - manual entry required") + execute_update( + """UPDATE incoming_files + SET status = 'pending', processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + + # Return data for user to review and confirm + return { + "status": "needs_review", + "file_id": file_id, + "template_matched": template_id is not None, + "template_id": template_id, + "vendor_id": vendor_id, + "confidence": confidence, + "extracted_fields": extracted_fields, + "pdf_text": text[:500], # First 500 chars for reference + "message": "Upload gennemført - gennemgå og bekræft data" + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Upload failed (inner): {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}") + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Upload failed (outer): {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}") + + +# ========== ECONOMIC SYNC ========== + + + +@router.post("/supplier-invoices/{invoice_id}/send-to-economic") +async def send_invoice_to_economic(invoice_id: int): + """Send supplier invoice to e-conomic - requires separate implementation""" + raise HTTPException(status_code=501, detail="e-conomic integration kommer senere") + + +@router.post("/supplier-invoices/reprocess/{file_id}") +async def reprocess_uploaded_file(file_id: int): + """ + Genbehandl en uploadet fil med template matching + Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet + """ + try: + # Get file record + file_record = execute_query( + "SELECT * FROM incoming_files WHERE file_id = %s", + (file_id,), + fetchone=True + ) + + if not file_record: + raise HTTPException(status_code=404, detail=f"Fil {file_id} ikke fundet") + + file_path = Path(file_record['file_path']) + if not file_path.exists(): + raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}") + + logger.info(f"�� Genbehandler fil {file_id}: {file_record['filename']}") + + # Extract text from file + text = await ollama_service._extract_text_from_file(file_path) + + # Try template matching + template_id, confidence = template_service.match_template(text) + + extracted_fields = {} + vendor_id = None + + if template_id and confidence >= 0.5: + logger.info(f"✅ Matched template {template_id} ({confidence:.0%})") + extracted_fields = template_service.extract_fields(text, template_id) + + template = template_service.templates_cache.get(template_id) + if template: + vendor_id = template.get('vendor_id') + + template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) + + execute_update( + """UPDATE incoming_files + SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (template_id, file_id) + ) + else: + logger.info("ℹ️ Ingen template match") + execute_update( + """UPDATE incoming_files + SET status = 'pending', processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + + return { + "status": "success", + "file_id": file_id, + "filename": file_record['filename'], + "template_matched": template_id is not None, + "template_id": template_id, + "vendor_id": vendor_id, + "confidence": confidence, + "extracted_fields": extracted_fields, + "pdf_text": text # Return full text for template builder + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Reprocess failed for file {file_id}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Genbehandling fejlede: {str(e)}") + + +# ========== TEMPLATE MANAGEMENT ========== + +@router.post("/supplier-invoices/ai-analyze") +async def ai_analyze_invoice(request: Dict): + """Brug AI til at analysere faktura og foreslå template felter""" + try: + pdf_text = request.get('pdf_text', '') + vendor_id = request.get('vendor_id') + + if not pdf_text: + raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet") + + # Build enhanced PDF text with instruction + enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering. + +RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! + +REQUIRED STRUKTUR (alle felter skal med): +{{ + "invoice_number": "5082481", + "invoice_date": "24/10-25", + "total_amount": "1471.20", + "cvr": "29522790", + "detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"], + "lines_start": "Nr.VarenrTekst", + "lines_end": "Subtotal" +}} + +FIND FØLGENDE: +1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice") +2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY) +3. total_amount: Total beløb + - Søg efter "Total", "I alt", "Totalbeløb" + - Hvis beløbet er på næste linje, match sidste tal + - Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20) +4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK") +5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren + - Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S") + - Website eller email (f.eks. "WWW.DCS.DK") + - Adresse element (f.eks. "Høgemosevænget", "Mårkærvej") + - UNDGÅ generiske ord som "Faktura", "Danmark", "Side" +6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr") +7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af") + +VIGTIGT: +- detection_patterns SKAL være mindst 3 specifikke tekststrenge +- Vælg tekststrenge der er UNIKKE for denne leverandør +- LAV IKKE patterns eller line_item - kun udtræk data + +PDF TEKST: +{pdf_text[:2000]} + +RETURNER KUN JSON - intet andet!""" + + # Call Ollama + logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst") + result = await ollama_service.extract_from_text(enhanced_text) + + if not result: + raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen") + + logger.info(f"✅ AI analyse gennemført: {result}") + return result + + except Exception as e: + logger.error(f"❌ AI analyse fejlede: {e}") + raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}") + + +@router.post("/supplier-invoices/templates") +async def create_template(request: Dict): + """ + Opret ny template + + Request body: + { + "vendor_id": 1, + "template_name": "Test Template", + "detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}], + "field_mappings": {"invoice_number": {"pattern": "Nummer\\s*(\\d+)", "group": 1}} + } + """ + try: + import json + + vendor_id = request.get('vendor_id') + template_name = request.get('template_name') + detection_patterns = request.get('detection_patterns', []) + field_mappings = request.get('field_mappings', {}) + + if not vendor_id or not template_name: + raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet") + + # Insert template and get template_id + query = """ + INSERT INTO supplier_invoice_templates + (vendor_id, template_name, detection_patterns, field_mappings) + VALUES (%s, %s, %s, %s) + RETURNING template_id + """ + result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings))) + template_id = result[0]['template_id'] if result else None + + if not template_id: + raise HTTPException(status_code=500, detail="Kunne ikke oprette template") + + # Reload templates in cache + template_service.reload_templates() + + logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}") + return {"template_id": template_id, "message": "Template oprettet"} + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to create template: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.put("/supplier-invoices/templates/{template_id}") +async def update_template( + template_id: int, + template_name: Optional[str] = None, + detection_patterns: Optional[List[Dict]] = None, + field_mappings: Optional[Dict] = None, + is_active: Optional[bool] = None +): + """Opdater eksisterende template""" + try: + import json + + updates = [] + params = [] + + if template_name: + updates.append("template_name = %s") + params.append(template_name) + if detection_patterns is not None: + updates.append("detection_patterns = %s") + params.append(json.dumps(detection_patterns)) + if field_mappings is not None: + updates.append("field_mappings = %s") + params.append(json.dumps(field_mappings)) + if is_active is not None: + updates.append("is_active = %s") + params.append(is_active) + + if not updates: + raise HTTPException(status_code=400, detail="Ingen opdateringer angivet") + + updates.append("updated_at = CURRENT_TIMESTAMP") + params.append(template_id) + + execute_update( + f"UPDATE supplier_invoice_templates SET {', '.join(updates)} WHERE template_id = %s", + tuple(params) + ) + + # Reload templates + template_service.reload_templates() + + logger.info(f"✅ Template {template_id} opdateret") + return {"message": "Template opdateret"} + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to update template: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/templates/{template_id}/test") +async def test_template(template_id: int, request: Dict): + """ + Test template mod PDF tekst + + Request body: + { + "pdf_text": "Full PDF text content..." + } + + Returns: + { + "matched": true/false, + "confidence": 0.85, + "extracted_fields": { + "invoice_number": "12345", + "invoice_date": "01/12-25", + "total_amount": "1234.56", + "vendor_cvr": "12345678" + }, + "detection_results": [ + {"pattern": "BMC Denmark ApS", "found": true, "weight": 0.5} + ] + } + """ + try: + import re + import json + + pdf_text = request.get('pdf_text', '') + if not pdf_text: + raise HTTPException(status_code=400, detail="pdf_text er påkrævet") + + # Fetch template + query = "SELECT * FROM supplier_invoice_templates WHERE template_id = %s" + template = execute_query(query, (template_id,)) + if not template: + raise HTTPException(status_code=404, detail="Template ikke fundet") + + template = template[0] + detection_patterns = template.get('detection_patterns', []) + field_mappings = template.get('field_mappings', {}) + + # Test detection patterns + total_score = 0.0 + max_score = 0.0 + detection_results = [] + + for pattern in detection_patterns: + pattern_type = pattern.get('type', 'text') + pattern_value = pattern.get('pattern', '') + weight = float(pattern.get('weight', 0.5)) + max_score += weight + + found = False + if pattern_type == 'text' and pattern_value in pdf_text: + found = True + total_score += weight + + detection_results.append({ + "pattern": pattern_value, + "type": pattern_type, + "found": found, + "weight": weight + }) + + confidence = (total_score / max_score) if max_score > 0 else 0.0 + matched = confidence >= 0.7 # Match threshold + + # Extract fields if matched + extracted_fields = {} + if matched: + for field_name, field_config in field_mappings.items(): + pattern = field_config.get('pattern', '') + group = field_config.get('group', 1) + + # Skip non-field patterns (lines_start, lines_end, line_item) + if field_name in ['lines_start', 'lines_end', 'line_item']: + continue + + try: + match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE) + if match and len(match.groups()) >= group: + extracted_fields[field_name] = match.group(group).strip() + except Exception as e: + logger.warning(f"Pattern match failed for {field_name}: {e}") + + # Extract line items if matched + line_items = [] + if matched: + # Extract line items using smart extraction + lines_start = field_mappings.get('lines_start', {}).get('pattern') + lines_end = field_mappings.get('lines_end', {}).get('pattern') + line_pattern = field_mappings.get('line_item', {}).get('pattern') + line_fields = field_mappings.get('line_item', {}).get('fields', []) + + if line_pattern or lines_start: + # Extract section between start and end markers + text_section = pdf_text + if lines_start: + try: + start_match = re.search(lines_start, pdf_text, re.IGNORECASE) + if start_match: + text_section = pdf_text[start_match.end():] + logger.debug(f"Found lines_start at position {start_match.end()}") + except Exception as e: + logger.warning(f"Failed to find lines_start: {e}") + + if lines_end: + try: + end_match = re.search(lines_end, text_section, re.IGNORECASE) + if end_match: + text_section = text_section[:end_match.start()] + logger.debug(f"Found lines_end at position {end_match.start()}") + except Exception as e: + logger.warning(f"Failed to find lines_end: {e}") + + # Try pattern first, then smart extraction + if line_pattern: + try: + for match in re.finditer(line_pattern, text_section, re.MULTILINE): + line_data = { + 'line_number': len(line_items) + 1, + 'raw_text': match.group(0) + } + + for idx, field_name in enumerate(line_fields, start=1): + if idx <= len(match.groups()): + line_data[field_name] = match.group(idx).strip() + + line_items.append(line_data) + except Exception as e: + logger.error(f"❌ Pattern extraction failed: {e}") + + # Fallback to smart extraction if no lines found + if not line_items: + logger.info("🧠 Trying smart extraction...") + logger.debug(f"Text section length: {len(text_section)}, first 500 chars: {text_section[:500]}") + line_items = _smart_extract_lines(text_section) + logger.info(f"🧠 Smart extraction returned {len(line_items)} items") + + if line_items: + logger.info(f"📦 Extracted {len(line_items)} line items from test") + else: + logger.warning(f"⚠️ No line items matched. Section length: {len(text_section)} chars") + logger.debug(f"Section preview: {text_section[:300]}") + + logger.info(f"🧪 Template {template_id} test: matched={matched}, confidence={confidence:.2f}, lines={len(line_items)}") + + return { + "matched": matched, + "confidence": round(confidence, 2), + "extracted_fields": extracted_fields, + "line_items": line_items, + "detection_results": detection_results, + "template_name": template.get('template_name', '') + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Template test failed: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/supplier-invoices/templates/{template_id}") +async def delete_template(template_id: int): + """Slet template (soft delete - sæt is_active=false)""" + try: + execute_update( + "UPDATE supplier_invoice_templates SET is_active = false WHERE template_id = %s", + (template_id,) + ) + + template_service.reload_templates() + + logger.info(f"✅ Template {template_id} deaktiveret") + return {"message": "Template slettet"} + except Exception as e: + logger.error(f"❌ Failed to delete template: {e}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/app/billing/frontend/__init__.py b/app/billing/frontend/__init__.py new file mode 100644 index 0000000..57f37e3 --- /dev/null +++ b/app/billing/frontend/__init__.py @@ -0,0 +1,3 @@ +""" +Billing Frontend Package +""" diff --git a/app/billing/frontend/supplier_invoices.html b/app/billing/frontend/supplier_invoices.html new file mode 100644 index 0000000..4f70170 --- /dev/null +++ b/app/billing/frontend/supplier_invoices.html @@ -0,0 +1,1364 @@ + + + + + + Leverandørfakturaer (Kassekladde) - BMC Hub + + + + + + + + + + +
+ + +
+
+

📋 Leverandørfakturaer

+

Kassekladde - Integration med e-conomic

+
+
+ + Se Templates + + + Template Builder + + +
+
+ + +
+
+
+

-

+

Overskredet

+ - +
+
+
+
+

-

+

Forfald inden 7 dage

+ - +
+
+
+
+

-

+

Afventer behandling

+ - +
+
+
+
+

-

+

Ubetalt i alt

+ - +
+
+
+ + +
+
+
+
+ Alle +
+
+ Afventer +
+
+ Godkendt +
+
+ Sendt til e-conomic +
+
+ Overskredet +
+
+
+
+ + +
+
+
+ + + + + + + + + + + + + + + + + + +
Fakturanr.LeverandørFakturadatoForfaldsdatoBeløbStatuse-conomicHandlinger
+
+ Indlæser... +
+
+
+
+
+
+ + + + + + + + + + + + + + + diff --git a/app/billing/frontend/template_builder.html b/app/billing/frontend/template_builder.html new file mode 100644 index 0000000..0052cec --- /dev/null +++ b/app/billing/frontend/template_builder.html @@ -0,0 +1,1261 @@ + + + + + + Template Builder - BMC Hub + + + + + + +
+
+
+
+
+

Template Builder

+

Byg templates til automatisk faktura-udtrækning

+
+ + Tilbage til Kassekladde + +
+ + +
+
+ Vælg Fil +
+
+ Vælg Leverandør +
+
+ Definer Patterns +
+
+ Test & Gem +
+
+ + +
+
+
Vælg Fil til Template
+
+
+
+ +
+
+
+ + +
+
+
Vælg Leverandør
+
+
+
+
+
PDF Preview
+
+ +
+
+
+
+ + + Vælg den leverandør som fakturaen kommer fra +
+
+ + + Navn på templaten, f.eks. leverandør + "Standard" eller "Email faktura" +
+ +
+
+
+
+ + +
+
+
Definer Udtrækningsmønstre
+
+
+
+
+
PDF Tekst Preview
+
+ + Sådan gør du:
+ 1. Klik "🤖 AI Auto-generer" for at lade AI finde alle felter automatisk
+ 2. Eller markér tekst manuelt og vælg felttype
+ 3. Systemet laver automatisk patterns! +
+ + + + +
+
+ Markeret tekst: + Ingen +
+
+ Hoved-felter: +
+
+ + + + +
+
+ Varelinjer: +
+
+ + + +
+
+ +
+ +
+
+
+
Udtrækningsmønstre
+ + +
+ + Tekststrenge der identificerer leverandøren/layout +
+ Markér tekst i PDF og klik "Detektion" knappen +
+
+ + +
+ +
+ + +
+ +
+
+ +
+ +
+ + +
+ +
+
+ +
+ +
+ + +
+ +
+
+ +
+ +
+ + +
+ +
+
+ +
+ +
Varelinjer Udtrækning (valgfri)
+
+ + Avanceret: Definer hvordan varelinjer skal findes i PDF'en. Dette er valgfrit - du kan også tilføje linjer manuelt senere. +
+ +
+ + + Tekst der vises lige før første varelinje starter +
+ +
+ + + Tekst der vises efter sidste varelinje +
+ +
+ + + Regex til at udtrække: varenummer, beskrivelse, antal, pris fra hver linje + +
+
+
Eksempel fra din PDF:
+
195006Betalingsmetode (Kortbetaling) 141,2041,20
+ +
Pattern forklaring:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DelPatternBeskrivelse
Linjenr^\d+Start af linje, tal
Varenr(\S+)Gruppe 1: Varenummer (ingen mellemrum)
Beskrivelse(.+?)Gruppe 2: Tekst (lazy match)
Antal([\d.,]+)Gruppe 3: Tal med komma/punktum
Pris([\d.,]+)Gruppe 4: Tal med komma/punktum
Beløb([\d.,]+)Gruppe 5: Tal med komma/punktum
+ +
Komplet pattern eksempel:
+
+ ^\d+(\S+)\s+(.+?)\s+([\d.,]+)([\d.,]+)([\d.,]+)$ +
+ +
Simplere variant (kun varenr og beskrivelse):
+
+ ^\d+(\S+)\s+(.+)$ +
+ + + +
+
+
+ + +
+
+
+
+ + +
+
+
Test & Gem Template
+
+
+
+
+
PDF Preview
+
+ +
+
+
+
+ +
+ + + +
+ + + +
+
+
+
+
+ +
+
+
+ + + + + + diff --git a/app/billing/frontend/templates_list.html b/app/billing/frontend/templates_list.html new file mode 100644 index 0000000..6fb2b0e --- /dev/null +++ b/app/billing/frontend/templates_list.html @@ -0,0 +1,363 @@ + + + + + + Templates - BMC Hub + + + + + + + + + +
+
+
+

Faktura Templates

+

Administrer templates til automatisk faktura-udtrækning

+
+ + Ny Template + +
+ +
+ +
+
+ + + + + + + + + diff --git a/app/billing/frontend/views.py b/app/billing/frontend/views.py new file mode 100644 index 0000000..87c6aaf --- /dev/null +++ b/app/billing/frontend/views.py @@ -0,0 +1,27 @@ +""" +Billing Frontend Views +Serves HTML pages for billing features +""" + +from fastapi import APIRouter +from fastapi.responses import FileResponse + +router = APIRouter() + + +@router.get("/billing/supplier-invoices") +async def supplier_invoices_page(): + """Supplier invoices (kassekladde) page""" + return FileResponse("app/billing/frontend/supplier_invoices.html") + + +@router.get("/billing/template-builder") +async def template_builder_page(): + """Template builder for supplier invoice extraction""" + return FileResponse("app/billing/frontend/template_builder.html") + + +@router.get("/billing/templates") +async def templates_list_page(): + """Templates list and management page""" + return FileResponse("app/billing/frontend/templates_list.html") diff --git a/app/core/config.py b/app/core/config.py index 67fe13e..ec54483 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -33,9 +33,19 @@ class Settings(BaseSettings): ECONOMIC_READ_ONLY: bool = True ECONOMIC_DRY_RUN: bool = True + # Ollama AI Integration + OLLAMA_ENDPOINT: str = "http://ai_direct.cs.blaahund.dk" + OLLAMA_MODEL: str = "qwen2.5:3b" # Hurtigere model til JSON extraction + + # File Upload + UPLOAD_DIR: str = "uploads" + MAX_FILE_SIZE_MB: int = 50 + ALLOWED_EXTENSIONS: List[str] = [".pdf", ".png", ".jpg", ".jpeg", ".txt", ".csv"] + class Config: env_file = ".env" case_sensitive = True + extra = "ignore" # Ignore extra fields from .env settings = Settings() diff --git a/app/services/economic_service.py b/app/services/economic_service.py new file mode 100644 index 0000000..f7cb6b2 --- /dev/null +++ b/app/services/economic_service.py @@ -0,0 +1,608 @@ +""" +e-conomic Integration Service +Send invoices and supplier invoices (kassekladde) to e-conomic accounting system + +🚨 SAFETY MODES: +- ECONOMIC_READ_ONLY: Blocks ALL write operations when True +- ECONOMIC_DRY_RUN: Logs operations but doesn't send to e-conomic when True +""" +import logging +import aiohttp +import json +from typing import Dict, Optional, List +from app.core.config import settings + +logger = logging.getLogger(__name__) + + +class EconomicService: + """Service for integrating with e-conomic REST API""" + + def __init__(self): + self.api_url = getattr(settings, 'ECONOMIC_API_URL', 'https://restapi.e-conomic.com') + self.app_secret_token = getattr(settings, 'ECONOMIC_APP_SECRET_TOKEN', None) + self.agreement_grant_token = getattr(settings, 'ECONOMIC_AGREEMENT_GRANT_TOKEN', None) + self.read_only = getattr(settings, 'ECONOMIC_READ_ONLY', True) + self.dry_run = getattr(settings, 'ECONOMIC_DRY_RUN', True) + + if not self.app_secret_token or not self.agreement_grant_token: + logger.warning("⚠️ e-conomic credentials not configured") + + # Log safety status at initialization + if self.read_only: + logger.warning("🔒 e-conomic READ-ONLY MODE ENABLED - All write operations will be blocked") + elif self.dry_run: + logger.warning("🏃 e-conomic DRY-RUN MODE ENABLED - Operations will be logged but not executed") + else: + logger.warning("⚠️ e-conomic WRITE MODE ACTIVE - Changes will be sent to production!") + + def _check_write_permission(self, operation: str) -> bool: + """ + Check if write operations are allowed + + Args: + operation: Name of the operation being attempted + + Returns: + True if operation should proceed, False if blocked + """ + if self.read_only: + logger.error(f"🚫 BLOCKED: {operation} - READ_ONLY mode is enabled") + logger.error("To enable writes, set ECONOMIC_READ_ONLY=false in .env") + return False + + if self.dry_run: + logger.warning(f"🏃 DRY-RUN: {operation} - Would execute but DRY_RUN mode is enabled") + logger.warning("To actually send to e-conomic, set ECONOMIC_DRY_RUN=false in .env") + return False + + # Triple-check for production writes + logger.warning(f"⚠️ EXECUTING WRITE OPERATION: {operation}") + logger.warning(f"⚠️ This will modify production e-conomic at {self.api_url}") + return True + + def _log_api_call(self, method: str, endpoint: str, payload: Optional[Dict] = None, + response_data: Optional[Dict] = None, status_code: Optional[int] = None): + """ + Comprehensive logging of all API calls + + Args: + method: HTTP method (GET, POST, etc.) + endpoint: API endpoint + payload: Request payload + response_data: Response data + status_code: HTTP status code + """ + log_entry = { + "method": method, + "endpoint": endpoint, + "api_url": self.api_url, + "read_only": self.read_only, + "dry_run": self.dry_run + } + + if payload: + log_entry["request_payload"] = payload + if response_data: + log_entry["response_data"] = response_data + if status_code: + log_entry["status_code"] = status_code + + logger.info(f"📊 e-conomic API Call: {json.dumps(log_entry, indent=2, default=str)}") + + def _get_headers(self) -> Dict[str, str]: + """Get HTTP headers for e-conomic API""" + if not self.app_secret_token or not self.agreement_grant_token: + raise ValueError("e-conomic credentials not configured") + + return { + 'X-AppSecretToken': self.app_secret_token, + 'X-AgreementGrantToken': self.agreement_grant_token, + 'Content-Type': 'application/json' + } + + async def test_connection(self) -> bool: + """ + Test e-conomic API connection + + Returns: + True if connection successful + """ + try: + async with aiohttp.ClientSession() as session: + async with session.get( + f"{self.api_url}/self", + headers=self._get_headers() + ) as response: + if response.status == 200: + data = await response.json() + logger.info(f"✅ Connected to e-conomic: {data.get('agreementNumber')}") + return True + else: + error = await response.text() + logger.error(f"❌ e-conomic connection failed: {response.status} - {error}") + return False + except Exception as e: + logger.error(f"❌ e-conomic connection error: {e}") + return False + + # ========== SUPPLIER/VENDOR MANAGEMENT ========== + + async def search_supplier_by_name(self, supplier_name: str) -> Optional[Dict]: + """ + Search for supplier in e-conomic based on name + + Args: + supplier_name: Name of supplier to search for + + Returns: + Supplier data if found, None otherwise + """ + try: + url = f"{self.api_url}/suppliers" + + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=self._get_headers()) as response: + if response.status != 200: + logger.error(f"❌ Failed to fetch suppliers: {response.status}") + return None + + data = await response.json() + suppliers = data.get('collection', []) + + # Search for supplier by name (case-insensitive) + search_name = supplier_name.lower().strip() + + for supplier in suppliers: + supplier_display_name = supplier.get('name', '').lower().strip() + + # Exact match or contains + if search_name in supplier_display_name or supplier_display_name in search_name: + logger.info(f"✅ Found supplier match: {supplier.get('name')} (ID: {supplier.get('supplierNumber')})") + return { + 'supplierNumber': supplier.get('supplierNumber'), + 'name': supplier.get('name'), + 'currency': supplier.get('currency'), + 'vatZone': supplier.get('vatZone') + } + + logger.warning(f"⚠️ No supplier found matching '{supplier_name}'") + return None + + except Exception as e: + logger.error(f"❌ Error searching supplier: {e}") + return None + + async def create_supplier(self, supplier_data: Dict) -> Optional[Dict]: + """ + Create new supplier in e-conomic + + 🚨 WRITE OPERATION - Respects READ_ONLY and DRY_RUN modes + + Args: + supplier_data: { + 'name': str, + 'address': str (optional), + 'city': str (optional), + 'zip': str (optional), + 'country': str (optional), + 'corporate_identification_number': str (optional - CVR), + 'currency': str (default 'DKK'), + 'payment_terms_number': int (default 1), + 'vat_zone_number': int (default 1) + } + + Returns: + Created supplier data with supplierNumber or None if failed + """ + if not self._check_write_permission("create_supplier"): + return None + + try: + # Build supplier payload + payload = { + "name": supplier_data['name'], + "currency": supplier_data.get('currency', 'DKK'), + "supplierGroup": { + "supplierGroupNumber": supplier_data.get('supplier_group_number', 1) + }, + "paymentTerms": { + "paymentTermsNumber": supplier_data.get('payment_terms_number', 4) # Netto 14 dage + }, + "vatZone": { + "vatZoneNumber": supplier_data.get('vat_zone_number', 1) + } + } + + # Optional fields + if supplier_data.get('address'): + payload['address'] = supplier_data['address'] + if supplier_data.get('city'): + payload['city'] = supplier_data['city'] + if supplier_data.get('zip'): + payload['zip'] = supplier_data['zip'] + if supplier_data.get('country'): + payload['country'] = supplier_data['country'] + if supplier_data.get('corporate_identification_number'): + payload['corporateIdentificationNumber'] = supplier_data['corporate_identification_number'] + + url = f"{self.api_url}/suppliers" + + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=self._get_headers(), json=payload) as response: + if response.status in [200, 201]: + result = await response.json() + logger.info(f"✅ Created supplier: {result.get('name')} (ID: {result.get('supplierNumber')})") + + # Save to local vendors table + try: + from app.core.database import execute_insert + + vendor_id = execute_insert(""" + INSERT INTO vendors ( + name, + cvr, + economic_supplier_number, + created_at + ) VALUES (%s, %s, %s, CURRENT_TIMESTAMP) + ON CONFLICT (economic_supplier_number) + DO UPDATE SET + name = EXCLUDED.name, + cvr = EXCLUDED.cvr + """, ( + result.get('name'), + supplier_data.get('corporate_identification_number'), + result.get('supplierNumber') + )) + + logger.info(f"✅ Saved supplier to local database (vendor_id: {vendor_id})") + except Exception as db_error: + logger.warning(f"⚠️ Could not save to local database: {db_error}") + + return result + else: + error_text = await response.text() + logger.error(f"❌ Failed to create supplier: {response.status} - {error_text}") + return None + + except Exception as e: + logger.error(f"❌ Error creating supplier: {e}") + return None + + # ========== KASSEKLADDE (JOURNALS/VOUCHERS) ========== + + async def get_supplier_invoice_journals(self) -> list: + """ + Get all available journals for supplier invoices (kassekladde) + + Returns: + List of journal dictionaries with journalNumber, name, and journalType + """ + try: + url = f"{self.api_url}/journals" + + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=self._get_headers()) as response: + if response.status != 200: + error_text = await response.text() + raise Exception(f"e-conomic API error: {response.status} - {error_text}") + + data = await response.json() + + # Filter for supplier invoice journals + journals = [] + for journal in data.get('collection', []): + journals.append({ + 'journalNumber': journal.get('journalNumber'), + 'name': journal.get('name'), + 'journalType': journal.get('journalType') + }) + + return journals + except Exception as e: + logger.error(f"❌ Error fetching journals: {e}") + raise + + async def create_journal_supplier_invoice(self, + journal_number: int, + supplier_number: int, + invoice_number: str, + invoice_date: str, + total_amount: float, + vat_breakdown: Dict[str, float], + line_items: List[Dict] = None, + due_date: Optional[str] = None, + text: Optional[str] = None) -> Dict: + """ + Post supplier invoice to e-conomic kassekladde (journals API) + + 🚨 WRITE OPERATION - Respects READ_ONLY and DRY_RUN modes + + Args: + journal_number: Journal/kassekladde number (from system_settings) + supplier_number: e-conomic supplier number + invoice_number: Supplier's invoice number + invoice_date: Invoice date (YYYY-MM-DD) + total_amount: Total invoice amount including VAT + vat_breakdown: Dict of {vat_code: {"net": X, "vat": Y, "gross": Z}} for each VAT group + line_items: List of line items with contra_account and vat_code + due_date: Payment due date (YYYY-MM-DD) + text: Invoice description + + Returns: + Dict with voucher details or error info + """ + # 🚨 SAFETY CHECK + if not self._check_write_permission("create_journal_supplier_invoice"): + return {"error": True, "message": "Write operations blocked by READ_ONLY or DRY_RUN mode"} + + try: + # Extract year from invoice date for accounting year + accounting_year = invoice_date[:4] + + # Build supplier invoice entries - one per line item or per VAT group + supplier_invoices = [] + + # If we have line items with contra accounts, use those + if line_items and isinstance(line_items, list): + # Group lines by VAT code and contra account combination + line_groups = {} + for line in line_items: + vat_code = line.get('vat_code', 'I25') + contra_account = line.get('contra_account', '5810') + key = f"{vat_code}_{contra_account}" + + if key not in line_groups: + line_groups[key] = { + 'vat_code': vat_code, + 'contra_account': contra_account, + 'gross': 0, + 'vat': 0, + 'items': [] + } + + line_total = line.get('line_total', 0) + vat_amount = line.get('vat_amount', 0) + + line_groups[key]['gross'] += line_total + line_groups[key]['vat'] += vat_amount + line_groups[key]['items'].append(line) + + # Create entry for each group + for key, group in line_groups.items(): + entry = { + "supplier": { + "supplierNumber": supplier_number + }, + "amount": round(group['gross'], 2), + "contraAccount": { + "accountNumber": int(group['contra_account']) + }, + "currency": { + "code": "DKK" + }, + "date": invoice_date, + "supplierInvoiceNumber": invoice_number[:30] if invoice_number else "" + } + + # Add text with product descriptions + descriptions = [item.get('description', '') for item in group['items'][:2]] + entry_text = text if text else f"Faktura {invoice_number}" + if descriptions: + entry_text = f"{entry_text} - {', '.join(filter(None, descriptions))}" + entry["text"] = entry_text[:250] + + if due_date: + entry["dueDate"] = due_date + + # Add VAT details + if group['vat'] > 0: + entry["contraVatAccount"] = { + "vatCode": group['vat_code'] + } + entry["contraVatAmount"] = round(group['vat'], 2) + + supplier_invoices.append(entry) + + elif vat_breakdown and isinstance(vat_breakdown, dict): + # Fallback: vat_breakdown format: {"I25": {"net": 1110.672, "vat": 277.668, "rate": 25, "gross": 1388.34}, ...} + for vat_code, vat_data in vat_breakdown.items(): + if not isinstance(vat_data, dict): + continue + + net_amount = vat_data.get('net', 0) + vat_amount = vat_data.get('vat', 0) + gross_amount = vat_data.get('gross', net_amount + vat_amount) + + if gross_amount <= 0: + continue + + entry = { + "supplier": { + "supplierNumber": supplier_number + }, + "amount": round(gross_amount, 2), + "contraAccount": { + "accountNumber": 5810 # Default fallback account + }, + "currency": { + "code": "DKK" + }, + "date": invoice_date, + "supplierInvoiceNumber": invoice_number[:30] if invoice_number else "" + } + + # Add text with VAT code for clarity + entry_text = text if text else f"Faktura {invoice_number}" + if len(vat_breakdown) > 1: + entry_text = f"{entry_text} ({vat_code})" + entry["text"] = entry_text[:250] + + if due_date: + entry["dueDate"] = due_date + + # Add VAT details + if vat_amount > 0: + entry["contraVatAccount"] = { + "vatCode": vat_code + } + entry["contraVatAmount"] = round(vat_amount, 2) + + supplier_invoices.append(entry) + else: + # No VAT breakdown - create single entry + supplier_invoice = { + "supplier": { + "supplierNumber": supplier_number + }, + "amount": total_amount, + "contraAccount": { + "accountNumber": 5810 # Default fallback account + }, + "currency": { + "code": "DKK" + }, + "date": invoice_date, + "supplierInvoiceNumber": invoice_number[:30] if invoice_number else "" + } + + if text: + supplier_invoice["text"] = text[:250] + if due_date: + supplier_invoice["dueDate"] = due_date + + supplier_invoices.append(supplier_invoice) + + # Build voucher payload + payload = { + "accountingYear": { + "year": accounting_year + }, + "journal": { + "journalNumber": journal_number + }, + "entries": { + "supplierInvoices": supplier_invoices + } + } + + logger.info(f"📤 Posting supplier invoice to journal {journal_number}") + logger.debug(f"Payload: {json.dumps(payload, indent=2)}") + + async with aiohttp.ClientSession() as session: + async with session.post( + f"{self.api_url}/journals/{journal_number}/vouchers", + headers=self._get_headers(), + json=payload + ) as response: + response_text = await response.text() + + self._log_api_call( + "POST", + f"/journals/{journal_number}/vouchers", + payload, + await response.json() if response.status in [200, 201] and response_text else None, + response.status + ) + + if response.status in [200, 201]: + data = await response.json() if response_text else {} + + # e-conomic returns array of created vouchers + if isinstance(data, list) and len(data) > 0: + voucher_data = data[0] + else: + voucher_data = data + + voucher_number = voucher_data.get('voucherNumber') + logger.info(f"✅ Supplier invoice posted to kassekladde: voucher #{voucher_number}") + return { + "success": True, + "voucher_number": voucher_number, + "journal_number": journal_number, + "accounting_year": accounting_year, + "data": voucher_data + } + else: + logger.error(f"❌ Post to kassekladde failed: {response.status}") + logger.error(f"Response: {response_text}") + return { + "error": True, + "status": response.status, + "message": response_text + } + + except Exception as e: + logger.error(f"❌ create_journal_supplier_invoice error: {e}") + logger.exception("Full traceback:") + return {"error": True, "status": 500, "message": str(e)} + + async def upload_voucher_attachment(self, + journal_number: int, + accounting_year: str, + voucher_number: int, + pdf_path: str, + filename: str) -> Dict: + """ + Upload PDF attachment to e-conomic voucher + + 🚨 WRITE OPERATION - Respects READ_ONLY and DRY_RUN modes + + Args: + journal_number: Journal number + accounting_year: Accounting year (e.g., "2025") + voucher_number: Voucher number + pdf_path: Local path to PDF file + filename: Filename for attachment + + Returns: + Dict with success status + """ + # 🚨 SAFETY CHECK + if not self._check_write_permission("upload_voucher_attachment"): + return {"error": True, "message": "Write operations blocked by READ_ONLY or DRY_RUN mode"} + + try: + # Read PDF file + with open(pdf_path, 'rb') as f: + pdf_data = f.read() + + # e-conomic attachment/file endpoint (POST is allowed here, not on /attachment) + url = f"{self.api_url}/journals/{journal_number}/vouchers/{accounting_year}-{voucher_number}/attachment/file" + + headers = { + 'X-AppSecretToken': self.app_secret_token, + 'X-AgreementGrantToken': self.agreement_grant_token + } + + # Use multipart/form-data as required by e-conomic API + form_data = aiohttp.FormData() + form_data.add_field('file', + pdf_data, + filename=filename, + content_type='application/pdf') + + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, data=form_data) as response: + if response.status in [200, 201, 204]: + logger.info(f"📎 PDF attachment uploaded to voucher {accounting_year}-{voucher_number}") + return {"success": True} + else: + error_text = await response.text() + logger.error(f"❌ Failed to upload attachment: {response.status} - {error_text}") + return {"error": True, "status": response.status, "message": error_text} + + except Exception as e: + logger.error(f"❌ upload_voucher_attachment error: {e}") + return {"error": True, "message": str(e)} + + +# Singleton instance +_economic_service_instance = None + +def get_economic_service() -> EconomicService: + """Get singleton instance of EconomicService""" + global _economic_service_instance + if _economic_service_instance is None: + _economic_service_instance = EconomicService() + return _economic_service_instance diff --git a/app/services/ollama_service.py b/app/services/ollama_service.py new file mode 100644 index 0000000..e4738b2 --- /dev/null +++ b/app/services/ollama_service.py @@ -0,0 +1,331 @@ +""" +Ollama Integration Service for BMC Hub +Handles supplier invoice extraction using Ollama LLM with CVR matching +""" + +import json +import hashlib +import logging +from pathlib import Path +from typing import Optional, Dict, List, Tuple +from datetime import datetime +import re + +from app.core.config import settings +from app.core.database import execute_insert, execute_query, execute_update + +logger = logging.getLogger(__name__) + +class OllamaService: + """Service for extracting supplier invoice data using Ollama LLM""" + + def __init__(self): + self.endpoint = settings.OLLAMA_ENDPOINT + self.model = settings.OLLAMA_MODEL + self.system_prompt = self._build_system_prompt() + logger.info(f"🤖 Initialized OllamaService: {self.endpoint}, model={self.model}") + + def _build_system_prompt(self) -> str: + """Build Danish system prompt for invoice extraction with CVR""" + return """Du er en ekspert i at læse og udtrække strukturerede data fra danske fakturaer og leverandørdokumenter. + +VIGTIGE REGLER: +1. Returner KUN gyldig JSON - ingen forklaring eller ekstra tekst +2. Hvis et felt ikke findes, sæt det til null +3. Beregn confidence baseret på hvor sikker du er på hvert felt (0.0-1.0) +4. Datoer skal være i format YYYY-MM-DD +5. Tal skal være decimaler (brug . som decimalseparator) +6. CVR-nummer skal være 8 cifre uden mellemrum +7. Moms/VAT skal udtrækkes fra hver linje hvis muligt + +JSON format skal være: +{ + "document_type": "invoice", + "invoice_number": "fakturanummer", + "vendor_name": "leverandør firmanavn", + "vendor_cvr": "12345678", + "invoice_date": "YYYY-MM-DD", + "due_date": "YYYY-MM-DD", + "currency": "DKK", + "total_amount": 1234.56, + "vat_amount": 123.45, + "lines": [ + { + "line_number": 1, + "description": "beskrivelse af varen/ydelsen", + "quantity": antal_som_tal, + "unit_price": pris_per_stk, + "line_total": total_for_linjen, + "vat_rate": 25.00, + "vat_amount": moms_beløb, + "confidence": 0.0_til_1.0 + } + ], + "confidence": gennemsnits_confidence, + "raw_text_snippet": "første 200 tegn fra dokumentet" +} + +EKSEMPEL: +Input: "FAKTURA 2025-001\\nGlobalConnect A/S\\nCVR: 12345678\\n1 stk Fiber 100/100 Mbit @ 299,00 DKK\\nMoms (25%): 74,75 DKK\\nTotal: 373,75 DKK" + +Output: { + "document_type": "invoice", + "invoice_number": "2025-001", + "vendor_name": "GlobalConnect A/S", + "vendor_cvr": "12345678", + "total_amount": 373.75, + "vat_amount": 74.75, + "lines": [{ + "line_number": 1, + "description": "Fiber 100/100 Mbit", + "quantity": 1, + "unit_price": 299.00, + "line_total": 299.00, + "vat_rate": 25.00, + "vat_amount": 74.75, + "confidence": 0.95 + }], + "confidence": 0.95 +}""" + + async def extract_from_text(self, text: str) -> Dict: + """ + Extract structured invoice data from text using Ollama + + Args: + text: Document text content + + Returns: + Extracted data as dict with CVR, invoice number, amounts, etc. + """ + + # Truncate text if too long (keep first 4000 chars) + if len(text) > 4000: + text = text[:4000] + "\\n[... tekst afkortet ...]" + + prompt = f"{self.system_prompt}\\n\\nNU SKAL DU UDTRÆKKE DATA FRA DENNE FAKTURA:\\n{text}\\n\\nReturner kun gyldig JSON:" + + logger.info(f"🤖 Extracting invoice data from text (length: {len(text)})") + + try: + import httpx + + async with httpx.AsyncClient(timeout=1000.0) as client: + response = await client.post( + f"{self.endpoint}/api/generate", + json={ + "model": self.model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.1, + "top_p": 0.9, + "num_predict": 2000 + } + } + ) + + if response.status_code != 200: + raise Exception(f"Ollama returned status {response.status_code}: {response.text}") + + result = response.json() + raw_response = result.get("response", "") + + logger.info(f"✅ Ollama extraction completed (response length: {len(raw_response)})") + + # Parse JSON from response + extraction = self._parse_json_response(raw_response) + + # Add raw response for debugging + extraction['_raw_llm_response'] = raw_response + + return extraction + + except Exception as e: + error_msg = f"Ollama extraction failed: {str(e)}" + logger.error(f"❌ {error_msg}") + + error_str = str(e).lower() + if "timeout" in error_str: + return { + "error": f"Ollama timeout efter 1000 sekunder", + "confidence": 0.0 + } + elif "connection" in error_str or "connect" in error_str: + return { + "error": f"Kan ikke forbinde til Ollama på {self.endpoint}", + "confidence": 0.0 + } + else: + return { + "error": error_msg, + "confidence": 0.0 + } + + def _parse_json_response(self, response: str) -> Dict: + """Parse JSON from LLM response with improved error handling""" + try: + # Find JSON in response (between first { and last }) + start = response.find('{') + end = response.rfind('}') + 1 + + if start >= 0 and end > start: + json_str = response[start:end] + + # Try to fix common JSON issues + # Remove trailing commas before } or ] + json_str = re.sub(r',(\s*[}\]])', r'\1', json_str) + # Fix single quotes to double quotes (but not in values) + # This is risky, so we only do it if initial parse fails + + try: + data = json.loads(json_str) + return data + except json.JSONDecodeError: + # Try to fix common issues + # Replace single quotes with double quotes (simple approach) + fixed_json = json_str.replace("'", '"') + try: + data = json.loads(fixed_json) + logger.warning("⚠️ Fixed JSON with quote replacement") + return data + except: + pass + + # Last resort: log the problematic JSON + logger.error(f"❌ Problematic JSON: {json_str[:300]}") + raise + else: + raise ValueError("No JSON found in response") + + except json.JSONDecodeError as e: + logger.error(f"❌ JSON parsing failed: {e}") + logger.error(f"Raw response preview: {response[:500]}") + return { + "error": f"JSON parsing failed: {str(e)}", + "confidence": 0.0, + "raw_response": response[:500] + } + + def calculate_file_checksum(self, file_path: Path) -> str: + """Calculate SHA256 checksum of file for duplicate detection""" + sha256 = hashlib.sha256() + with open(file_path, 'rb') as f: + while chunk := f.read(8192): + sha256.update(chunk) + checksum = sha256.hexdigest() + logger.info(f"📋 Calculated checksum: {checksum[:16]}... for {file_path.name}") + return checksum + + async def _extract_text_from_file(self, file_path: Path) -> str: + """Extract text from PDF, image, or text file""" + suffix = file_path.suffix.lower() + + try: + if suffix == '.pdf': + return await self._extract_text_from_pdf(file_path) + elif suffix in ['.png', '.jpg', '.jpeg']: + return await self._extract_text_from_image(file_path) + elif suffix in ['.txt', '.csv']: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + return f.read() + else: + raise ValueError(f"Unsupported file type: {suffix}") + + except Exception as e: + logger.error(f"❌ Text extraction failed for {file_path.name}: {e}") + raise + + async def _extract_text_from_pdf(self, file_path: Path) -> str: + """Extract text from PDF using PyPDF2""" + try: + from PyPDF2 import PdfReader + + reader = PdfReader(file_path) + text = "" + + for page_num, page in enumerate(reader.pages): + page_text = page.extract_text() + text += f"\\n--- Side {page_num + 1} ---\\n{page_text}" + + logger.info(f"📄 Extracted {len(text)} chars from PDF with {len(reader.pages)} pages") + return text + + except Exception as e: + logger.error(f"❌ PDF extraction failed: {e}") + raise + + async def _extract_text_from_image(self, file_path: Path) -> str: + """Extract text from image using Tesseract OCR""" + try: + import pytesseract + from PIL import Image + + image = Image.open(file_path) + + # Use Danish + English for OCR + text = pytesseract.image_to_string(image, lang='dan+eng') + + logger.info(f"🖼️ Extracted {len(text)} chars from image via OCR") + return text + + except Exception as e: + logger.error(f"❌ OCR extraction failed: {e}") + # Fallback to English only + try: + text = pytesseract.image_to_string(Image.open(file_path), lang='eng') + logger.warning(f"⚠️ Fallback to English OCR: {len(text)} chars") + return text + except: + raise + + def _get_mime_type(self, file_path: Path) -> str: + """Get MIME type from file extension""" + suffix = file_path.suffix.lower() + mime_types = { + '.pdf': 'application/pdf', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.txt': 'text/plain', + '.csv': 'text/csv' + } + return mime_types.get(suffix, 'application/octet-stream') + + def match_vendor_by_cvr(self, vendor_cvr: Optional[str]) -> Optional[Dict]: + """ + Match vendor from database using CVR number + + Args: + vendor_cvr: CVR number from extraction + + Returns: + Vendor dict if found, None otherwise + """ + if not vendor_cvr: + return None + + # Clean CVR (remove spaces, dashes) + cvr_clean = re.sub(r'[^0-9]', '', vendor_cvr) + + if len(cvr_clean) != 8: + logger.warning(f"⚠️ Invalid CVR format: {vendor_cvr} (cleaned: {cvr_clean})") + return None + + # Search vendors table + vendor = execute_query( + "SELECT * FROM vendors WHERE cvr = %s", + (cvr_clean,), + fetchone=True + ) + + if vendor: + logger.info(f"✅ Matched vendor: {vendor['name']} (CVR: {cvr_clean})") + return vendor + else: + logger.info(f"⚠️ No vendor found with CVR: {cvr_clean}") + return None + + +# Global instance +ollama_service = OllamaService() diff --git a/app/services/template_service.py b/app/services/template_service.py new file mode 100644 index 0000000..69983e8 --- /dev/null +++ b/app/services/template_service.py @@ -0,0 +1,305 @@ +""" +Supplier Invoice Template Service +Simple template-based invoice field extraction (no AI) +Inspired by OmniSync's invoice template system +""" + +import re +import logging +from typing import Dict, List, Optional, Tuple +from datetime import datetime +from pathlib import Path + +from app.core.database import execute_query, execute_insert, execute_update + +logger = logging.getLogger(__name__) + + +class TemplateService: + """Service for template-based invoice extraction""" + + def __init__(self): + self.templates_cache = {} + self._load_templates() + + def _load_templates(self): + """Load all active templates into cache""" + try: + templates = execute_query( + """SELECT t.*, v.name as vendor_name, v.cvr as vendor_cvr + FROM supplier_invoice_templates t + LEFT JOIN vendors v ON t.vendor_id = v.id + WHERE t.is_active = TRUE""" + ) + + if templates: + for template in templates: + self.templates_cache[template['template_id']] = template + logger.info(f"📚 Loaded {len(self.templates_cache)} active templates") + else: + logger.warning("⚠️ No templates found") + except Exception as e: + logger.error(f"❌ Failed to load templates: {e}") + + def match_template(self, pdf_text: str) -> Tuple[Optional[int], float]: + """ + Find best matching template for PDF text + Returns: (template_id, confidence_score) + """ + best_match = None + best_score = 0.0 + pdf_text_lower = pdf_text.lower() + + for template_id, template in self.templates_cache.items(): + score = self._calculate_match_score(pdf_text_lower, template) + + if score > best_score: + best_score = score + best_match = template_id + + if best_match: + logger.info(f"✅ Matched template {best_match} ({self.templates_cache[best_match]['template_name']}) with {best_score:.0%} confidence") + + return best_match, best_score + + def _calculate_match_score(self, pdf_text: str, template: Dict) -> float: + """Calculate match score based on detection patterns""" + score = 0.0 + patterns = template.get('detection_patterns', []) + + if not patterns: + return 0.0 + + for pattern_obj in patterns: + pattern_type = pattern_obj.get('type') + weight = pattern_obj.get('weight', 0.5) + + if pattern_type == 'text': + # Simple text search + pattern = pattern_obj.get('pattern', '').lower() + if pattern in pdf_text: + score += weight + + elif pattern_type == 'cvr': + # CVR number match (exact) + cvr = str(pattern_obj.get('value', '')) + if cvr in pdf_text: + score += weight # CVR match is strong signal + + elif pattern_type == 'regex': + # Regex pattern match + pattern = pattern_obj.get('pattern', '') + if re.search(pattern, pdf_text, re.IGNORECASE): + score += weight + + return min(score, 1.0) # Cap at 100% + + def extract_fields(self, pdf_text: str, template_id: int) -> Dict: + """Extract invoice fields using template's regex patterns""" + template = self.templates_cache.get(template_id) + if not template: + logger.warning(f"⚠️ Template {template_id} not found in cache") + return {} + + field_mappings = template.get('field_mappings', {}) + extracted = {} + + for field_name, field_config in field_mappings.items(): + pattern = field_config.get('pattern') + group = field_config.get('group', 1) + + if not pattern: + continue + + try: + match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE) + if match and len(match.groups()) >= group: + value = match.group(group).strip() + extracted[field_name] = value + logger.debug(f" ✓ {field_name}: {value}") + except Exception as e: + logger.warning(f" ✗ Failed to extract {field_name}: {e}") + + return extracted + + def extract_line_items(self, pdf_text: str, template_id: int) -> List[Dict]: + """Extract invoice line items using template's line patterns""" + template = self.templates_cache.get(template_id) + if not template: + logger.warning(f"⚠️ Template {template_id} not found in cache") + return [] + + field_mappings = template.get('field_mappings', {}) + + # Get line extraction config + lines_start = field_mappings.get('lines_start', {}).get('pattern') + lines_end = field_mappings.get('lines_end', {}).get('pattern') + line_pattern = field_mappings.get('line_item', {}).get('pattern') + line_fields = field_mappings.get('line_item', {}).get('fields', []) + + if not line_pattern: + logger.debug("No line_item pattern configured") + return [] + + # Extract section between start and end markers + text_section = pdf_text + if lines_start: + try: + start_match = re.search(lines_start, pdf_text, re.IGNORECASE) + if start_match: + text_section = pdf_text[start_match.end():] + logger.debug(f"Found lines_start, section starts at position {start_match.end()}") + except Exception as e: + logger.warning(f"Failed to find lines_start: {e}") + + if lines_end: + try: + end_match = re.search(lines_end, text_section, re.IGNORECASE) + if end_match: + text_section = text_section[:end_match.start()] + logger.debug(f"Found lines_end, section ends at position {end_match.start()}") + except Exception as e: + logger.warning(f"Failed to find lines_end: {e}") + + # Try multiple extraction strategies + lines = self._extract_with_pattern(text_section, line_pattern, line_fields) + + if not lines: + # Fallback: Try smart extraction for common formats + lines = self._smart_line_extraction(text_section, line_fields) + + logger.info(f"📦 Extracted {len(lines)} line items") + return lines + + def _extract_with_pattern(self, text: str, pattern: str, field_names: List[str]) -> List[Dict]: + """Extract lines using regex pattern""" + lines = [] + try: + for match in re.finditer(pattern, text, re.MULTILINE): + line_data = { + 'line_number': len(lines) + 1, + 'raw_text': match.group(0) + } + + # Map captured groups to field names + for idx, field_name in enumerate(field_names, start=1): + if idx <= len(match.groups()): + line_data[field_name] = match.group(idx).strip() + + lines.append(line_data) + except Exception as e: + logger.error(f"❌ Pattern extraction failed: {e}") + + return lines + + def _smart_line_extraction(self, text: str, field_names: List[str]) -> List[Dict]: + """ + Multi-line extraction for ALSO-style invoices. + + Format: + 100 48023976 REFURB LENOVO ThinkPad P15 G1 Grde A + ...metadata lines... + 1ST 3.708,27 3.708,27 + + Combines data from description line + price line. + """ + lines_arr = text.split('\n') + items = [] + i = 0 + + while i < len(lines_arr): + line = lines_arr[i].strip() + + # Find position + varenr + beskrivelse linje + # Match: "100 48023976 REFURB LENOVO ThinkPad P15 G1 Grde A" + item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line) + if item_match: + position = item_match.group(1) + item_number = item_match.group(2) + description = item_match.group(3).strip() + + # Skip hvis det er en header + if re.search(r'(Position|Varenr|Beskrivelse|Antal|Pris|Total)', line, re.IGNORECASE): + i += 1 + continue + + # Find næste linje med antal+priser (inden for 10 linjer) + quantity = None + unit_price = None + total_price = None + + for j in range(i+1, min(i+10, len(lines_arr))): + price_line = lines_arr[j].strip() + # Match: "1ST 3.708,27 3.708,27" + price_match = re.match(r'^(\d+)\s*(?:ST|stk|pc|pcs)\s+([\d.,]+)\s+([\d.,]+)', price_line, re.IGNORECASE) + if price_match: + quantity = price_match.group(1) + unit_price = price_match.group(2).replace(',', '.') + total_price = price_match.group(3).replace(',', '.') + break + + # Kun tilføj hvis vi fandt priser + if quantity and unit_price: + items.append({ + 'line_number': len(items) + 1, + 'position': position, + 'item_number': item_number, + 'description': description, + 'quantity': quantity, + 'unit_price': unit_price, + 'total_price': total_price, + 'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}" + }) + logger.info(f"✅ Multi-line item: {item_number} - {description[:30]}... ({quantity}ST @ {unit_price})") + + i += 1 + + if items: + logger.info(f"📦 Multi-line extraction found {len(items)} items") + else: + logger.warning("⚠️ Multi-line extraction found no items") + + return items + + def log_usage(self, template_id: int, file_id: int, matched: bool, + confidence: float, fields: Dict): + """Log template usage for statistics""" + try: + execute_insert( + """INSERT INTO template_usage_log + (template_id, file_id, matched, confidence, fields_extracted) + VALUES (%s, %s, %s, %s, %s)""", + (template_id, file_id, matched, confidence, fields) + ) + + if matched: + # Update template stats + execute_update( + """UPDATE supplier_invoice_templates + SET usage_count = usage_count + 1, + success_count = success_count + 1, + last_used_at = CURRENT_TIMESTAMP + WHERE template_id = %s""", + (template_id,) + ) + except Exception as e: + logger.error(f"❌ Failed to log template usage: {e}") + + def get_vendor_templates(self, vendor_id: int) -> List[Dict]: + """Get all templates for a vendor""" + return execute_query( + """SELECT * FROM supplier_invoice_templates + WHERE vendor_id = %s AND is_active = TRUE + ORDER BY usage_count DESC""", + (vendor_id,), + fetchall=True + ) + + def reload_templates(self): + """Reload templates from database""" + self.templates_cache = {} + self._load_templates() + + +# Global instance +template_service = TemplateService() diff --git a/app/shared/frontend/base.html b/app/shared/frontend/base.html index cdaad6a..4456cbe 100644 --- a/app/shared/frontend/base.html +++ b/app/shared/frontend/base.html @@ -205,6 +205,7 @@