From 890bd6245d62edc448a37596923da0102c25e9b9 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 8 Dec 2025 23:46:18 +0100 Subject: [PATCH] feat: Add template editing functionality and improve file loading logic - Added an "Edit" button for templates in the templates list, redirecting to the template builder. - Enhanced loadPendingFiles function to filter files by vendor ID, displaying a message if no files are found. - Modified openTestModal to load vendor-specific files based on the selected template. - Updated Ollama model configuration for improved JSON extraction. - Refactored Ollama service to support different API formats based on model type. - Implemented lazy loading of templates in TemplateService for better performance. - Added VAT note extraction for invoice line items. - Updated Docker Compose configuration for Ollama model settings. --- .env.bak | 53 ++ app/billing/backend/supplier_invoices.py | 355 +++++------- app/billing/frontend/supplier_invoices.html | 69 ++- app/billing/frontend/template_builder.html | 611 +++++++++++++++++--- app/billing/frontend/templates_list.html | 39 +- app/core/config.py | 2 +- app/services/ollama_service.py | 89 ++- app/services/template_service.py | 50 +- docker-compose.yml | 2 + docker-compose.yml.bak2 | 71 +++ docker-compose.yml.bak3 | 71 +++ 11 files changed, 1078 insertions(+), 334 deletions(-) create mode 100644 .env.bak create mode 100644 docker-compose.yml.bak2 create mode 100644 docker-compose.yml.bak3 diff --git a/.env.bak b/.env.bak new file mode 100644 index 0000000..0d915ad --- /dev/null +++ b/.env.bak @@ -0,0 +1,53 @@ +# ===================================================== +# POSTGRESQL DATABASE - Local Development +# ===================================================== +DATABASE_URL=postgresql://bmc_hub:bmc_hub@postgres:5432/bmc_hub + +# Database credentials (bruges af docker-compose) +POSTGRES_USER=bmc_hub +POSTGRES_PASSWORD=bmc_hub +POSTGRES_DB=bmc_hub +POSTGRES_PORT=5433 + +# ===================================================== +# API CONFIGURATION +# ===================================================== +API_HOST=0.0.0.0 +API_PORT=8001 +API_RELOAD=true + +# ===================================================== +# SECURITY +# ===================================================== +SECRET_KEY=change-this-in-production-use-random-string +CORS_ORIGINS=http://localhost:8000,http://localhost:3000 + +# ===================================================== +# LOGGING +# ===================================================== +LOG_LEVEL=INFO +LOG_FILE=logs/app.log + +# ===================================================== +# GITHUB/GITEA REPOSITORY (Optional - for reference) +# ===================================================== +# Repository: https://g.bmcnetworks.dk/ct/bmc_hub +GITHUB_REPO=ct/bmc_hub + +# ===================================================== +# OLLAMA AI INTEGRATION +# ===================================================== +OLLAMA_ENDPOINT=http://ai_direct.cs.blaahund.dk +OLLAMA_MODEL=qwen2.5:3b + +# ===================================================== +# e-conomic Integration (Optional) +# ===================================================== +# Get credentials from e-conomic Settings -> Integrations -> API +ECONOMIC_API_URL=https://restapi.e-conomic.com +ECONOMIC_APP_SECRET_TOKEN=your_app_secret_token_here +ECONOMIC_AGREEMENT_GRANT_TOKEN=your_agreement_grant_token_here + +# 🚨 SAFETY SWITCHES - Beskytter mod utilsigtede ændringer +ECONOMIC_READ_ONLY=true # Set to false ONLY after testing +ECONOMIC_DRY_RUN=true # Set to false ONLY when ready for production writes diff --git a/app/billing/backend/supplier_invoices.py b/app/billing/backend/supplier_invoices.py index a7d0ed1..e4a4b25 100644 --- a/app/billing/backend/supplier_invoices.py +++ b/app/billing/backend/supplier_invoices.py @@ -281,6 +281,16 @@ async def get_file_extracted_data(file_id: int): fetchone=True ) + # Parse llm_response_json if it exists (from AI or template extraction) + llm_json_data = None + if extraction and extraction.get('llm_response_json'): + import json + try: + llm_json_data = json.loads(extraction['llm_response_json']) if isinstance(extraction['llm_response_json'], str) else extraction['llm_response_json'] + logger.info(f"📊 Parsed llm_response_json: invoice_number={llm_json_data.get('invoice_number')}") + except Exception as e: + logger.warning(f"⚠️ Failed to parse llm_response_json: {e}") + # Get extraction lines if exist extraction_lines = [] if extraction: @@ -299,11 +309,65 @@ async def get_file_extracted_data(file_id: int): if file_path.exists(): pdf_text = await ollama_service._extract_text_from_file(file_path) + # Format line items for frontend + formatted_lines = [] + if extraction_lines: + for line in extraction_lines: + formatted_lines.append({ + "description": line.get('description'), + "quantity": float(line.get('quantity')) if line.get('quantity') else None, + "unit_price": float(line.get('unit_price')) if line.get('unit_price') else None, + "vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None, + "line_total": float(line.get('line_total')) if line.get('line_total') else None, + "vat_note": line.get('vat_note') + }) + elif llm_json_data and llm_json_data.get('lines'): + # Use lines from LLM JSON response + for line in llm_json_data['lines']: + formatted_lines.append({ + "description": line.get('description'), + "quantity": float(line.get('quantity')) if line.get('quantity') else None, + "unit_price": float(line.get('unit_price')) if line.get('unit_price') else None, + "vat_rate": float(line.get('vat_rate')) if line.get('vat_rate') else None, + "line_total": float(line.get('line_total')) if line.get('line_total') else None, + "vat_note": line.get('vat_note') + }) + + # Build llm_data response + llm_data = None + if llm_json_data: + # Use invoice_number from LLM JSON (works for both AI and template extraction) + llm_data = { + "invoice_number": llm_json_data.get('invoice_number'), + "invoice_date": llm_json_data.get('invoice_date'), + "due_date": llm_json_data.get('due_date'), + "total_amount": float(llm_json_data.get('total_amount')) if llm_json_data.get('total_amount') else None, + "currency": llm_json_data.get('currency') or 'DKK', + "document_type": llm_json_data.get('document_type'), + "lines": formatted_lines + } + elif extraction: + # Fallback to extraction table columns if no LLM JSON + llm_data = { + "invoice_number": extraction.get('document_id'), + "invoice_date": extraction.get('document_date').isoformat() if extraction.get('document_date') else None, + "due_date": extraction.get('due_date').isoformat() if extraction.get('due_date') else None, + "total_amount": float(extraction.get('total_amount')) if extraction.get('total_amount') else None, + "currency": extraction.get('currency') or 'DKK', + "document_type": extraction.get('document_type'), + "lines": formatted_lines + } + + # Get vendor from extraction + vendor_matched_id = extraction.get('vendor_matched_id') if extraction else None + return { "file_id": file_id, "filename": file_info['filename'], "status": file_info['status'], "uploaded_at": file_info['uploaded_at'], + "vendor_matched_id": vendor_matched_id, + "llm_data": llm_data, "extraction": extraction, "extraction_lines": extraction_lines if extraction_lines else [], "pdf_text_preview": pdf_text[:5000] if pdf_text else None @@ -351,6 +415,12 @@ async def download_pending_file(file_id: int): raise HTTPException(status_code=500, detail=str(e)) +@router.get("/supplier-invoices/files/{file_id}/pdf") +async def get_file_pdf(file_id: int): + """Get PDF file for viewing (alias for download endpoint)""" + return await download_pending_file(file_id) + + @router.post("/supplier-invoices/files/{file_id}/link-vendor") async def link_vendor_to_extraction(file_id: int, data: dict): """Link an existing vendor to the extraction""" @@ -705,6 +775,29 @@ async def list_templates(): raise HTTPException(status_code=500, detail=str(e)) +@router.get("/supplier-invoices/templates/{template_id}") +async def get_template(template_id: int): + """Hent et specifikt template med vendor info""" + try: + query = """ + SELECT t.*, v.name as vendor_name, v.cvr_number as vendor_cvr + FROM supplier_invoice_templates t + LEFT JOIN vendors v ON t.vendor_id = v.id + WHERE t.template_id = %s AND t.is_active = true + """ + template = execute_query(query, (template_id,), fetchone=True) + + if not template: + raise HTTPException(status_code=404, detail="Template not found") + + return template + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to get template {template_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/supplier-invoices/search-vendor") async def search_vendor_by_info(request: Dict): """ @@ -1023,6 +1116,9 @@ async def create_supplier_invoice(data: Dict): # Insert lines if provided if data.get('lines'): for idx, line in enumerate(data['lines'], start=1): + # Map vat_code: I52 for reverse charge, I25 for standard + vat_code = line.get('vat_code', 'I25') + execute_insert( """INSERT INTO supplier_invoice_lines (supplier_invoice_id, line_number, description, quantity, unit_price, @@ -1035,7 +1131,7 @@ async def create_supplier_invoice(data: Dict): line.get('quantity', 1), line.get('unit_price', 0), line.get('line_total', 0), - line.get('vat_code', 'I25'), + vat_code, line.get('vat_rate', 25.00), line.get('vat_amount', 0), line.get('contra_account', '5810'), @@ -1582,11 +1678,34 @@ async def upload_supplier_invoice(file: UploadFile = File(...)): import json extraction_id = execute_insert( """INSERT INTO extractions - (file_id, template_id, extraction_method, raw_data, extracted_at) - VALUES (%s, %s, %s, %s, CURRENT_TIMESTAMP)""", - (file_id, template_id, 'template', json.dumps(extracted_fields)) + (file_id, vendor_matched_id, document_id, document_date, due_date, + total_amount, currency, document_type, confidence, llm_response_json, status) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'extracted')""", + (file_id, vendor_id, + extracted_fields.get('invoice_number'), + extracted_fields.get('invoice_date'), + extracted_fields.get('due_date'), + extracted_fields.get('total_amount'), + extracted_fields.get('currency', 'DKK'), + extracted_fields.get('document_type'), + confidence, + json.dumps(extracted_fields)) ) + # Insert line items if extracted + if extracted_fields.get('lines'): + for idx, line in enumerate(extracted_fields['lines'], start=1): + execute_insert( + """INSERT INTO extraction_lines + (extraction_id, line_number, description, quantity, unit_price, + line_total, vat_rate, vat_note, confidence) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""", + (extraction_id, idx, line.get('description'), + line.get('quantity'), line.get('unit_price'), + line.get('line_total'), line.get('vat_rate'), + line.get('vat_note'), confidence) + ) + # Log usage template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) @@ -1598,86 +1717,22 @@ async def upload_supplier_invoice(file: UploadFile = File(...)): (template_id, file_id) ) else: - # FALLBACK: Use AI to extract data universally - logger.info("🤖 No template matched - using AI universal extraction...") + # NO AI FALLBACK - Require template + logger.warning(f"⚠️ No template matched (confidence: {confidence:.0%}) - rejecting file") - try: - # Build AI prompt for universal extraction - ai_prompt = f"""OPGAVE: Analyser denne danske faktura og udtræk nøgledata. - -RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! - -REQUIRED STRUKTUR: -{{ - "invoice_number": "5082481", - "invoice_date": "2025-10-24", - "due_date": "2025-11-24", - "total_amount": "1471.20", - "currency": "DKK", - "vendor_name": "DCS ApS", - "vendor_cvr": "29522790", - "vendor_address": "Høgemosevænget 89, 2820 Gentofte", - "line_items": [ - {{"description": "Ubiquiti Switch", "quantity": 1, "unit_price": "619.00", "total": "619.00"}} - ] -}} - -VIGTIGT: -- Dato format: YYYY-MM-DD -- Ignorer CVR {settings.OWN_CVR} (det er KØBERS CVR - find LEVERANDØRENS CVR) -- currency: Normalt "DKK" for danske fakturaer -- line_items: Udtræk så mange linjer som muligt -- Hvis et felt ikke kan findes, brug null - -PDF TEKST: -{text[:3000]} - -RETURNER KUN JSON!""" - - # Call AI - ai_result = await ollama_service.extract_from_text(ai_prompt) - - if ai_result and ai_result.get('vendor_cvr'): - # Try to find existing vendor by CVR - vendor = execute_query( - "SELECT id, name FROM vendors WHERE cvr_number = %s", - (ai_result['vendor_cvr'],), - fetchone=True - ) - - if vendor: - vendor_id = vendor['id'] - logger.info(f"✅ AI matched vendor: {vendor['name']} (CVR: {ai_result['vendor_cvr']})") - else: - logger.info(f"ℹ️ AI found unknown vendor CVR: {ai_result['vendor_cvr']}") - - extracted_fields = ai_result - - # Save extraction to database - import json - extraction_id = execute_insert( - """INSERT INTO extractions - (file_id, extraction_method, raw_data, extracted_at) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""", - (file_id, 'ai_universal', json.dumps(ai_result)) - ) - - execute_update( - """UPDATE incoming_files - SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP - WHERE file_id = %s""", - (file_id,) - ) - - except Exception as ai_error: - logger.warning(f"⚠️ AI extraction failed: {ai_error} - manual entry required") - execute_update( - """UPDATE incoming_files - SET status = 'pending', processed_at = CURRENT_TIMESTAMP - WHERE file_id = %s""", - (file_id,) - ) - + execute_update( + """UPDATE incoming_files + SET status = 'failed', + error_message = 'Ingen template match - opret template for denne leverandør', + processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + + raise HTTPException( + status_code=400, + detail=f"Ingen template match ({confidence:.0%} confidence) - opret template for denne leverandør" + ) # Return data for user to review and confirm return { @@ -1767,136 +1822,26 @@ async def reprocess_uploaded_file(file_id: int): (template_id, file_id) ) else: - logger.info("🤖 Ingen template match - bruger AI udtrækning med forbedret system prompt") - - # Use improved Ollama service with credit note detection - ai_result = await ollama_service.extract_from_text(text) - - if not ai_result or 'error' in ai_result: - execute_update( - """UPDATE incoming_files - SET status = 'failed', error_message = 'AI udtrækning returnerede ingen data', - processed_at = CURRENT_TIMESTAMP - WHERE file_id = %s""", - (file_id,) - ) - return { - "status": "failed", - "file_id": file_id, - "error": "AI udtrækning fejlede" - } - - # Search for vendor by CVR (normalize: remove DK prefix) - vendor_cvr = ai_result.get('vendor_cvr', '').replace('DK', '').replace('dk', '').strip() - vendor_id = None - - # CRITICAL: If AI mistakenly identified our own company as vendor, reject it - if vendor_cvr == settings.OWN_CVR: - logger.warning(f"⚠️ AI wrongly identified BMC Denmark (CVR {settings.OWN_CVR}) as vendor - this is the customer!") - vendor_cvr = None - ai_result['vendor_cvr'] = None - ai_result['vendor_name'] = None - - if vendor_cvr: - vendor = execute_query( - "SELECT id, name FROM vendors WHERE cvr_number = %s", - (vendor_cvr,), - fetchone=True - ) - if vendor: - vendor_id = vendor['id'] - logger.info(f"✅ Matched vendor: {vendor['name']} (CVR: {vendor_cvr})") - else: - logger.warning(f"⚠️ Vendor not found for CVR: {vendor_cvr}") - - # Extract dates from raw text if AI didn't provide them - invoice_date = ai_result.get('invoice_date') - due_date = ai_result.get('due_date') - - # Validate and clean dates - if invoice_date == '': - invoice_date = None - if due_date == '' or not due_date: - # If no due date, default to 30 days after invoice date - if invoice_date: - from datetime import datetime, timedelta - try: - inv_date_obj = datetime.strptime(invoice_date, '%Y-%m-%d') - due_date_obj = inv_date_obj + timedelta(days=30) - due_date = due_date_obj.strftime('%Y-%m-%d') - logger.info(f"📅 Calculated due_date: {due_date} (invoice_date + 30 days)") - except: - due_date = None - else: - due_date = None - - if not invoice_date and 'raw_text_snippet' in ai_result: - # Try to find date in format "Dato: DD.MM.YYYY" - import re - from datetime import datetime - date_match = re.search(r'Dato:\s*(\d{2})\.(\d{2})\.(\d{4})', ai_result['raw_text_snippet']) - if date_match: - day, month, year = date_match.groups() - invoice_date = f"{year}-{month}-{day}" - logger.info(f"📅 Extracted invoice_date from text: {invoice_date}") - - # Normalize line items (AI might return 'lines' or 'line_items') - line_items = ai_result.get('line_items') or ai_result.get('lines') or [] - - # Use matched vendor name if found, otherwise use AI's name - vendor_name = ai_result.get('vendor_name') - if vendor_id and vendor: - vendor_name = vendor['name'] # Override with actual vendor name from database - logger.info(f"✅ Using matched vendor name: {vendor_name}") - - # Save extraction to database with document_type_detected - document_type = ai_result.get('document_type', 'invoice') - extraction_id = execute_insert( - """INSERT INTO extractions ( - file_id, vendor_matched_id, llm_response_json, - vendor_name, vendor_cvr, document_date, due_date, - total_amount, currency, confidence, status, document_type_detected - ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'extracted', %s) - RETURNING extraction_id""", - ( - file_id, vendor_id, json.dumps(ai_result), - vendor_name, vendor_cvr, # Use corrected vendor name - invoice_date, due_date, # Use extracted dates - ai_result.get('total_amount'), ai_result.get('currency', 'DKK'), - ai_result.get('confidence', 0.8), - document_type # Store detected document type (invoice or credit_note) - ) - ) - - # Save line items (handle both 'lines' and 'line_items') - if line_items: - for idx, line in enumerate(line_items, 1): - execute_update( - """INSERT INTO extraction_lines ( - extraction_id, line_number, description, quantity, - unit_price, line_total, vat_rate, vat_amount, confidence - ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""", - ( - extraction_id, idx, - line.get('description'), - line.get('quantity'), - line.get('unit_price'), - line.get('total_price') or line.get('line_total'), - line.get('vat_rate'), - line.get('vat_amount'), - line.get('confidence', 0.8) - ) - ) + # NO AI FALLBACK - Require template matching + logger.warning(f"⚠️ Ingen template match (confidence: {confidence:.0%}) - afviser fil") execute_update( """UPDATE incoming_files - SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP, error_message = NULL + SET status = 'failed', + error_message = 'Ingen template match - opret template for denne leverandør', + processed_at = CURRENT_TIMESTAMP WHERE file_id = %s""", (file_id,) ) - extracted_fields = ai_result + return { + "status": "failed", + "file_id": file_id, + "error": "Ingen template match - opret template for denne leverandør", + "confidence": confidence + } + # Return success with template data return { "status": "success", "file_id": file_id, diff --git a/app/billing/frontend/supplier_invoices.html b/app/billing/frontend/supplier_invoices.html index 6e3d5cc..0bcf4cf 100644 --- a/app/billing/frontend/supplier_invoices.html +++ b/app/billing/frontend/supplier_invoices.html @@ -480,8 +480,8 @@
PDF Dokument
-
- +
+
@@ -544,6 +544,11 @@
+
+ 💡 Momskoder:
+ I25 Standard 25% moms (køb med moms)
+ I52 Omvendt betalingspligt (ingen moms - auto-detekteres fra "⚠️ OMVENDT BETALINGSPLIGT") +
@@ -1390,16 +1395,29 @@ async function openManualEntryMode() { return; } + // Set file ID + document.getElementById('manualEntryFileId').value = fileId; + + // Clear form + document.getElementById('manualEntryForm').reset(); + document.getElementById('manualLineItems').innerHTML = ''; + manualLineCounter = 0; + // Close review modal - const reviewModal = bootstrap.Modal.getInstance(document.getElementById('reviewModal')); + const reviewModal = bootstrap.Modal.getInstance(document.getElementById('reviewExtractedDataModal')); if (reviewModal) { reviewModal.hide(); } - // Set file ID - document.getElementById('manualEntryFileId').value = fileId; + // Open manual entry modal first + console.log('Opening manual entry modal...'); + const manualModal = new bootstrap.Modal(document.getElementById('manualEntryModal')); + manualModal.show(); - // Load PDF + // Wait a bit for modal to render + await new Promise(resolve => setTimeout(resolve, 300)); + + // Load PDF after modal is open console.log('Loading PDF...'); document.getElementById('manualEntryPdfViewer').src = `/api/v1/supplier-invoices/files/${fileId}/pdf`; @@ -1407,11 +1425,6 @@ async function openManualEntryMode() { console.log('Loading vendors...'); await loadVendorsForManual(); - // Clear form - document.getElementById('manualEntryForm').reset(); - document.getElementById('manualLineItems').innerHTML = ''; - manualLineCounter = 0; - // Load extracted data and prefill form console.log('Loading extracted data...'); try { @@ -1423,10 +1436,14 @@ async function openManualEntryMode() { // Prefill form fields if (data.llm_data) { const llm = data.llm_data; + console.log('LLM data invoice_number:', llm.invoice_number); // Invoice number if (llm.invoice_number) { + console.log('Setting invoice number:', llm.invoice_number); document.getElementById('manualInvoiceNumber').value = llm.invoice_number; + } else { + console.warn('No invoice_number in llm_data'); } // Invoice date @@ -1456,7 +1473,7 @@ async function openManualEntryMode() { // Vendor - select if matched if (data.vendor_matched_id) { - document.getElementById('manualVendorSelect').value = data.vendor_matched_id; + document.getElementById('manualVendorId').value = data.vendor_matched_id; } // Add line items @@ -1466,7 +1483,14 @@ async function openManualEntryMode() { const lineNum = manualLineCounter; if (line.description) { - document.getElementById(`manualLineDesc${lineNum}`).value = line.description; + let desc = line.description; + // Add VAT note to description if present + if (line.vat_note === 'reverse_charge') { + desc += ' ⚠️ OMVENDT BETALINGSPLIGT'; + } else if (line.vat_note === 'copydan_included') { + desc += ' [Copydan incl.]'; + } + document.getElementById(`manualLineDesc${lineNum}`).value = desc; } if (line.quantity) { document.getElementById(`manualLineQty${lineNum}`).value = line.quantity; @@ -1498,10 +1522,6 @@ async function openManualEntryMode() { addManualLine(); } - // Open modal - console.log('Opening manual entry modal...'); - const manualModal = new bootstrap.Modal(document.getElementById('manualEntryModal')); - manualModal.show(); console.log('Manual entry modal opened successfully'); } catch (error) { @@ -1618,19 +1638,30 @@ async function saveManualInvoice() { const descriptions = document.getElementsByName('line_description[]'); const quantities = document.getElementsByName('line_quantity[]'); const prices = document.getElementsByName('line_price[]'); + const vatRates = document.getElementsByName('line_vat[]'); const lines = []; for (let i = 0; i < descriptions.length; i++) { if (descriptions[i].value.trim()) { + const desc = descriptions[i].value; const qty = parseFloat(quantities[i].value) || 1; const price = parseFloat(prices[i].value) || 0; + const vatRate = parseFloat(vatRates[i].value) || 25.00; + + // Detect VAT code from description + let vatCode = 'I25'; // Default: 25% input VAT + if (desc.includes('OMVENDT BETALINGSPLIGT') || desc.includes('⚠️ OMVENDT BETALINGSPLIGT')) { + vatCode = 'I52'; // Reverse charge - no VAT + } + lines.push({ line_number: i + 1, - description: descriptions[i].value, + description: desc, quantity: qty, unit_price: price, line_total: qty * price, - vat_rate: 25.00 + vat_rate: vatRate, + vat_code: vatCode }); } } diff --git a/app/billing/frontend/template_builder.html b/app/billing/frontend/template_builder.html index 0052cec..abfaf0c 100644 --- a/app/billing/frontend/template_builder.html +++ b/app/billing/frontend/template_builder.html @@ -173,6 +173,15 @@
PDF Tekst Preview
+ + + +
Sådan gør du:
@@ -440,16 +449,237 @@ let pdfText = ''; let selectedText = ''; let detectionPatterns = []; let fieldPatterns = {}; +let editingTemplateId = null; // Track if we're editing // Load pending files on page load document.addEventListener('DOMContentLoaded', async () => { - await loadPendingFiles(); - await loadVendors(); + // Check if we're editing an existing template + const urlParams = new URLSearchParams(window.location.search); + editingTemplateId = urlParams.get('id'); + + if (editingTemplateId) { + await loadExistingTemplate(editingTemplateId); + } else { + await loadPendingFiles(); + await loadVendors(); + } }); +async function loadExistingTemplate(templateId) { + try { + console.log('Loading template:', templateId); + + // Load template data + const response = await fetch(`/api/v1/supplier-invoices/templates/${templateId}`); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const template = await response.json(); + console.log('Template loaded:', template); + + if (!template) { + console.error('Template not found:', templateId); + alert('Template ikke fundet'); + window.location.href = '/billing/templates'; + return; + } + + console.log('Template found:', template.template_name); + + // Update page title + document.querySelector('h1').innerHTML = `Rediger Template: ${template.template_name}`; + + // Populate template data + document.getElementById('templateName').value = template.template_name; + + // Load vendors and select the correct one + await loadVendors(); + document.getElementById('vendorSelect').value = template.vendor_id; + + // Load detection patterns + detectionPatterns = template.detection_patterns || []; + + // Load field patterns + fieldPatterns = template.field_mappings || {}; + + // Skip to step 3 (patterns) - show the step first + document.getElementById('stepContent1').classList.add('d-none'); + document.getElementById('stepContent2').classList.add('d-none'); + document.getElementById('stepContent3').classList.remove('d-none'); + + document.getElementById('step1').classList.remove('active'); + document.getElementById('step2').classList.remove('active'); + document.getElementById('step3').classList.add('active'); + + // Wait a bit for DOM to be ready, then populate fields + setTimeout(() => { + renderFieldPatterns(); + }, 100); + + // Load files from this vendor to show PDF preview + const filesResponse = await fetch('/api/v1/pending-supplier-invoice-files'); + const filesData = await filesResponse.json(); + + // Filter files by vendor + const vendorFiles = filesData.files.filter(f => + f.vendor_matched_id == template.vendor_id || + f.vendor_name == template.vendor_name + ); + + console.log(`Found ${vendorFiles.length} files for vendor ${template.vendor_name}`); + + // Show file selector in edit mode and populate it + const fileSelector = document.getElementById('editFileSelector'); + const fileSelect = document.getElementById('editFileSelect'); + if (fileSelector && fileSelect) { + fileSelector.style.display = 'block'; + fileSelect.innerHTML = ''; + + vendorFiles.forEach(file => { + const option = document.createElement('option'); + option.value = file.file_id; + option.textContent = `${file.filename} (${file.status})`; + fileSelect.appendChild(option); + }); + + // If no vendor files, show all files + if (vendorFiles.length === 0) { + filesData.files.forEach(file => { + const option = document.createElement('option'); + option.value = file.file_id; + option.textContent = `${file.filename} - ${file.vendor_name || 'Ukendt'} (${file.status})`; + fileSelect.appendChild(option); + }); + } + } + + if (vendorFiles.length > 0) { + const firstFile = vendorFiles[0]; + if (fileSelect) { + fileSelect.value = firstFile.file_id; + } + const fileResponse = await fetch(`/api/v1/supplier-invoices/reprocess/${firstFile.file_id}`, { + method: 'POST' + }); + const fileData = await fileResponse.json(); + selectedText = fileData.pdf_text || ''; + pdfText = selectedText; // Set pdfText for pattern testing + const pdfPreview = document.getElementById('pdfPreview'); + if (pdfPreview) { + pdfPreview.textContent = selectedText; + } + } else { + console.log('No files found for this vendor - loading any file'); + // Fallback to any file if no vendor-specific files + if (filesData.files.length > 0) { + const firstFile = filesData.files[0]; + if (fileSelect) { + fileSelect.value = firstFile.file_id; + } + const fileResponse = await fetch(`/api/v1/supplier-invoices/reprocess/${firstFile.file_id}`, { + method: 'POST' + }); + const fileData = await fileResponse.json(); + selectedText = fileData.pdf_text || ''; + pdfText = selectedText; // Set pdfText for pattern testing + const pdfPreview = document.getElementById('pdfPreview'); + if (pdfPreview) { + pdfPreview.textContent = selectedText; + } + } + } + + console.log('Template loaded successfully'); + + } catch (error) { + console.error('Failed to load template:', error); + console.error('Error details:', error.message, error.stack); + alert(`Kunne ikke hente template: ${error.message}`); + window.location.href = '/billing/templates'; + } +} + +// Load selected file when user changes dropdown +async function loadSelectedFile() { + const fileSelect = document.getElementById('editFileSelect'); + if (!fileSelect || !fileSelect.value) { + console.log('No file selected'); + return; + } + + const fileId = fileSelect.value; + console.log(`Loading file ${fileId}...`); + + try { + const fileResponse = await fetch(`/api/v1/supplier-invoices/reprocess/${fileId}`, { + method: 'POST' + }); + + if (!fileResponse.ok) { + throw new Error(`HTTP ${fileResponse.status}: ${await fileResponse.text()}`); + } + + const fileData = await fileResponse.json(); + selectedText = fileData.pdf_text || ''; + pdfText = selectedText; // Set pdfText for pattern testing + + const pdfPreview = document.getElementById('pdfPreview'); + if (pdfPreview) { + pdfPreview.textContent = selectedText; + console.log(`Loaded ${selectedText.length} characters from file`); + } + } catch (error) { + console.error('Failed to load file:', error); + alert(`Kunne ikke hente fil: ${error.message}`); + } +} + +function renderDetectionPatterns() { + // Detection patterns are stored in array, show them in UI somehow + // For now, just log them - you might want to add a display area + console.log('Detection patterns:', detectionPatterns); +} + +function renderFieldPatterns() { + // Populate field pattern inputs - check if elements exist first + const invoiceNumberPattern = document.getElementById('invoiceNumberPattern'); + const datePattern = document.getElementById('datePattern'); + const totalPattern = document.getElementById('totalPattern'); + const cvrPattern = document.getElementById('cvrPattern'); + const linesStartPattern = document.getElementById('linesStartPattern'); + const linesEndPattern = document.getElementById('linesEndPattern'); + const lineItemPattern = document.getElementById('lineItemPattern'); + + if (fieldPatterns.invoice_number && invoiceNumberPattern) { + invoiceNumberPattern.value = fieldPatterns.invoice_number.pattern || ''; + } + if (fieldPatterns.invoice_date && datePattern) { + datePattern.value = fieldPatterns.invoice_date.pattern || ''; + } + if (fieldPatterns.total_amount && totalPattern) { + totalPattern.value = fieldPatterns.total_amount.pattern || ''; + } + if (fieldPatterns.vendor_cvr && cvrPattern) { + cvrPattern.value = fieldPatterns.vendor_cvr.pattern || ''; + } + if (fieldPatterns.lines_start && linesStartPattern) { + linesStartPattern.value = fieldPatterns.lines_start.pattern || ''; + } + if (fieldPatterns.lines_end && linesEndPattern) { + linesEndPattern.value = fieldPatterns.lines_end.pattern || ''; + } + if (fieldPatterns.line_item && lineItemPattern) { + lineItemPattern.value = fieldPatterns.line_item.pattern || ''; + } + + console.log('Field patterns populated'); +} + async function loadPendingFiles() { try { - const response = await fetch('/api/v1/supplier-invoices/pending-files'); + const response = await fetch('/api/v1/pending-supplier-invoice-files'); const data = await response.json(); const filesList = document.getElementById('filesList'); @@ -597,15 +827,21 @@ function setField(fieldName) { return; } + console.log('setField called:', { fieldName, selectedText }); + // Auto-generate regex pattern based on selected text const pattern = generatePattern(selectedText, fieldName); + console.log('Generated pattern:', pattern); + // Store pattern fieldPatterns[fieldName] = { value: selectedText, pattern: pattern }; + console.log('Stored in fieldPatterns:', fieldPatterns[fieldName]); + // Update UI if (fieldName === 'invoice_number') { document.getElementById('invoiceNumberValue').value = selectedText; @@ -682,56 +918,87 @@ function setLineField(lineFieldType) { } function generatePattern(text, fieldName) { - // Find context before the value in PDF - const index = pdfText.indexOf(text); - if (index === -1) return escapeRegex(text); + console.log('generatePattern called:', { text, fieldName }); + console.log('pdfText length:', pdfText.length); - // Get 30 chars before for better context - const before = pdfText.substring(Math.max(0, index - 30), index).trim(); - const words = before.split(/\s+/); - const lastWord = words[words.length - 1] || ''; - const secondLastWord = words[words.length - 2] || ''; + // Split selected text into words to find label and value + const words = text.trim().split(/\s+/); + console.log('Selected text words:', words); - // Generate pattern based on field type + let label = ''; + let value = ''; + + // For invoice_number, date, amount: first word is usually the label if (fieldName === 'invoice_number') { - // Number pattern - if (/^\d+$/.test(text)) { - return `${escapeRegex(lastWord)}\\s*(\\d+)`; + // Try to find number in selected text + const numberMatch = text.match(/(\d+)/); + console.log('Number match:', numberMatch); + if (numberMatch) { + value = numberMatch[1]; + // Find word before the number + const beforeNumber = text.substring(0, text.indexOf(value)).trim(); + console.log('Before number:', beforeNumber); + const labelWords = beforeNumber.split(/\s+/); + console.log('Label words:', labelWords); + label = labelWords[labelWords.length - 1] || 'Nummer'; + console.log('Using label:', label); + + const pattern = `${escapeRegex(label)}\\s+(\\d+)`; + console.log('Invoice number pattern:', pattern); + return pattern; + } else { + console.log('No number found in selected text!'); } } else if (fieldName === 'invoice_date') { - // Date pattern - very flexible - // Detect various date formats: DD/MM-YY, DD-MM-YYYY, DD.MM.YYYY, etc. - const datePatterns = [ - /(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{2,4})/, // DD/MM/YY or DD-MM-YYYY - /(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{2})/, // DD/MM-YY - /(\d{2,4})[\/\-\.](\d{1,2})[\/\-\.](\d{1,2})/ // YYYY-MM-DD - ]; - - for (let dp of datePatterns) { - if (dp.test(text)) { - // Use flexible pattern that matches any separator - return `${escapeRegex(lastWord)}\\s*(\\d{1,2}[\\/.\\-]\\d{1,2}[\\/.\\-]\\d{2,4})`; - } - } - - // If no pattern matches, try with two words context - if (secondLastWord) { - return `${escapeRegex(secondLastWord)}\\s+${escapeRegex(lastWord)}\\s*(.+)`; + // Find date in selected text + const dateMatch = text.match(/(\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2,4})/); + if (dateMatch) { + value = dateMatch[1]; + const beforeDate = text.substring(0, text.indexOf(value)).trim(); + const labelWords = beforeDate.split(/\s+/); + label = labelWords[labelWords.length - 1] || 'Dato'; + + const pattern = `${escapeRegex(label)}\\s+(\\d{1,2}[\\/.\\-]\\d{1,2}[\\/.\\-]\\d{2,4})`; + console.log('Date pattern:', pattern); + return pattern; } } else if (fieldName === 'total_amount') { - // Amount pattern - handle Danish format (1.234,56 or 1234,56) - if (/[\d.,]+/.test(text)) { - return `${escapeRegex(lastWord)}\\s*([\\d.,]+)`; + // Find amount in selected text + const amountMatch = text.match(/([\d.,]+)\s*$/); + if (amountMatch) { + value = amountMatch[1]; + const beforeAmount = text.substring(0, text.indexOf(value)).trim(); + const labelWords = beforeAmount.split(/\s+/); + label = labelWords[labelWords.length - 1] || 'beløb'; + + const pattern = `${escapeRegex(label)}\\s+([\\d.,]+)`; + console.log('Amount pattern:', pattern); + return pattern; } } else if (fieldName === 'cvr') { - // CVR pattern - if (/\d{8}/.test(text)) { - return `${escapeRegex(lastWord)}\\s*(\\d{8})`; + // Find CVR number (8 digits, possibly with DK prefix) + const cvrMatch = text.match(/DK(\d{8})|(\d{8})/); + if (cvrMatch) { + const beforeCvr = text.substring(0, text.indexOf(cvrMatch[1] || cvrMatch[2])).trim(); + const labelWords = beforeCvr.split(/\s+/); + label = labelWords[labelWords.length - 1] || 'CVR'; + + const pattern = `${escapeRegex(label)}\\s+\\w*(\\d{8})`; + console.log('CVR pattern:', pattern); + return pattern; } } - // Fallback: exact match with context - return `${escapeRegex(lastWord)}\\s*(${escapeRegex(text)})`; + // Fallback: use first word as label + if (words.length >= 2) { + label = words[0]; + const pattern = `${escapeRegex(label)}\\s+(.+?)`; + console.log('Fallback pattern:', pattern); + return pattern; + } + + // Ultimate fallback + return escapeRegex(text); } function escapeRegex(str) { @@ -1023,6 +1290,10 @@ async function saveTemplate() { const vendorId = document.getElementById('vendorSelect').value; const templateName = document.getElementById('templateName').value; + console.log('Saving template...', { vendorId, templateName, editingTemplateId }); + console.log('Detection patterns:', detectionPatterns); + console.log('Field patterns:', fieldPatterns); + if (!vendorId || !templateName) { alert('Vælg leverandør og angiv template navn'); return; @@ -1034,11 +1305,14 @@ async function saveTemplate() { } // Build detection patterns from array - const detectionPatternsData = detectionPatterns.map(text => ({ - type: 'text', - pattern: text.trim(), - weight: 0.5 - })); + const detectionPatternsData = detectionPatterns.map(item => { + // Handle both string format (new) and object format (loaded from DB) + if (typeof item === 'string') { + return { type: 'text', pattern: item.trim(), weight: 0.5 }; + } else { + return { type: item.type || 'text', pattern: item.pattern, weight: item.weight || 0.5 }; + } + }); // Build field mappings from stored patterns const fieldMappings = {}; @@ -1091,8 +1365,15 @@ async function saveTemplate() { } try { - const response = await fetch('/api/v1/supplier-invoices/templates', { - method: 'POST', + const url = editingTemplateId + ? `/api/v1/supplier-invoices/templates/${editingTemplateId}` + : '/api/v1/supplier-invoices/templates'; + const method = editingTemplateId ? 'PUT' : 'POST'; + + console.log('Sending request:', { url, method, fieldMappings }); + + const response = await fetch(url, { + method: method, headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ vendor_id: parseInt(vendorId), @@ -1102,10 +1383,15 @@ async function saveTemplate() { }) }); + console.log('Response status:', response.status); + if (response.ok) { const data = await response.json(); - alert(`✅ Template gemt! ID: ${data.template_id}\n\nDu kan nu uploade fakturaer og systemet vil automatisk udtrække data.`); - window.location.href = '/billing/supplier-invoices'; + const message = editingTemplateId + ? `✅ Template opdateret!\n\nÆndringerne er gemt.` + : `✅ Template gemt! ID: ${data.template_id}\n\nDu kan nu uploade fakturaer og systemet vil automatisk udtrække data.`; + alert(message); + window.location.href = '/billing/templates'; } else { const error = await response.json(); alert(`❌ Fejl: ${error.detail}`); @@ -1126,45 +1412,61 @@ async function testTemplate() { } // Build detection patterns from array - const detectionPatternsData = detectionPatterns.map(text => ({ - type: 'text', - pattern: text.trim(), - weight: 0.5 - })); + const detectionPatternsData = detectionPatterns.map(item => { + // Handle both string format (new) and object format (loaded from DB) + if (typeof item === 'string') { + return { type: 'text', pattern: item.trim(), weight: 0.5 }; + } else { + return { type: item.type || 'text', pattern: item.pattern, weight: item.weight || 0.5 }; + } + }); - // Build field mappings from stored patterns - const fieldMappings = {}; + // Build field mappings - use existing fieldPatterns if loaded from DB + let fieldMappings = {}; - if (fieldPatterns.invoice_number) { - fieldMappings.invoice_number = { - pattern: fieldPatterns.invoice_number.pattern, - group: 1 - }; + console.log('fieldPatterns:', fieldPatterns); + console.log('Has invoice_number?', fieldPatterns.invoice_number); + console.log('Has pattern?', fieldPatterns.invoice_number?.pattern); + + // If fieldPatterns already has the right structure (loaded from DB), use it directly + if (fieldPatterns.invoice_number && fieldPatterns.invoice_number.pattern) { + console.log('Using fieldPatterns directly from DB'); + fieldMappings = { ...fieldPatterns }; + console.log('fieldMappings after copy:', fieldMappings); + } else { + console.log('Building fieldMappings from form'); + // Build from form fields (new template creation) + if (fieldPatterns.invoice_number) { + fieldMappings.invoice_number = { + pattern: fieldPatterns.invoice_number.pattern, + group: 1 + }; + } + + if (fieldPatterns.invoice_date) { + fieldMappings.invoice_date = { + pattern: fieldPatterns.invoice_date.pattern, + format: 'DD/MM-YY', + group: 1 + }; + } + + if (fieldPatterns.total_amount) { + fieldMappings.total_amount = { + pattern: fieldPatterns.total_amount.pattern, + group: 1 + }; + } + + if (fieldPatterns.vendor_cvr || fieldPatterns.cvr) { + fieldMappings.vendor_cvr = { + pattern: (fieldPatterns.vendor_cvr || fieldPatterns.cvr).pattern, + group: 1 + }; + } } - if (fieldPatterns.invoice_date) { - fieldMappings.invoice_date = { - pattern: fieldPatterns.invoice_date.pattern, - format: 'DD/MM-YY', - group: 1 - }; - } - - if (fieldPatterns.total_amount) { - fieldMappings.total_amount = { - pattern: fieldPatterns.total_amount.pattern, - group: 1 - }; - } - - if (fieldPatterns.cvr) { - fieldMappings.vendor_cvr = { - pattern: fieldPatterns.cvr.pattern, - group: 1 - }; - } - - // Add line extraction patterns if provided + // Add line extraction patterns from form inputs const linesStartPattern = document.getElementById('linesStartPattern').value; const linesEndPattern = document.getElementById('linesEndPattern').value; const lineItemPattern = document.getElementById('lineItemPattern').value; @@ -1213,9 +1515,15 @@ async function testTemplate() { for (let [fieldName, config] of Object.entries(fieldMappings)) { if (['lines_start', 'lines_end', 'line_item'].includes(fieldName)) continue; + console.log(`Testing field ${fieldName}:`, config); + try { const regex = new RegExp(config.pattern, 'i'); + console.log(`Regex for ${fieldName}:`, regex); + const match = pdfText.match(regex); + console.log(`Match result for ${fieldName}:`, match); + if (match && match[config.group]) { extractedHtml += `
  • ${fieldName}: "${match[config.group].trim()}"
  • `; extractedCount++; @@ -1228,12 +1536,145 @@ async function testTemplate() { } extractedHtml += ''; + // Test line item extraction + let lineItemsHtml = ''; + if (fieldMappings.lines_start && fieldMappings.lines_end && fieldMappings.line_item) { + lineItemsHtml = '
    Varelinjer:
    '; + + try { + const startMatch = pdfText.match(new RegExp(fieldMappings.lines_start.pattern, 'i')); + const endMatch = pdfText.match(new RegExp(fieldMappings.lines_end.pattern, 'i')); + + if (startMatch && endMatch) { + const startPos = pdfText.indexOf(startMatch[0]) + startMatch[0].length; + const endPos = pdfText.indexOf(endMatch[0]); + const lineSection = pdfText.substring(startPos, endPos); + + // Check if we have separate item and price patterns (ALSO style) + if (fieldMappings.line_price) { + // Two-pattern extraction: item info + price info + const itemRegex = new RegExp(fieldMappings.line_item.pattern, 'gim'); + const priceRegex = new RegExp(fieldMappings.line_price.pattern, 'gim'); + + const itemMatches = [...lineSection.matchAll(itemRegex)]; + const priceMatches = [...lineSection.matchAll(priceRegex)]; + + console.log('Item matches:', itemMatches.length, 'Price matches:', priceMatches.length); + + if (itemMatches.length > 0 && priceMatches.length > 0) { + lineItemsHtml += `

    ✅ Fandt ${Math.min(itemMatches.length, priceMatches.length)} varelinjer:

    `; + lineItemsHtml += '
    '; + lineItemsHtml += ''; + lineItemsHtml += ''; + + // Combine item and price matches + const maxLines = Math.min(5, itemMatches.length, priceMatches.length); + for (let i = 0; i < maxLines; i++) { + const item = itemMatches[i]; + const price = priceMatches[i]; + + // Check for VAT markers between this price and next item + const priceEndPos = price.index + price[0].length; + let nextItemStartPos = lineSection.length; + + // Find start of next item (if exists) + if (i + 1 < itemMatches.length) { + nextItemStartPos = itemMatches[i + 1].index; + } + + // Check section between price and next item + const betweenSection = lineSection.substring(priceEndPos, nextItemStartPos); + + console.log(`Item ${i} (pos ${item[1]}):`, { + priceEndPos, + nextItemStartPos, + betweenLength: betweenSection.length, + betweenPreview: betweenSection.substring(0, 100) + }); + + const hasReverseCharge = /omvendt.*betalingspligt/i.test(betweenSection); + const hasCopydan = /copydan/i.test(betweenSection); + + console.log(` VAT checks: Omvendt=${hasReverseCharge}, Copydan=${hasCopydan}`); + + let vatMarker = ''; + if (hasReverseCharge && hasCopydan) { + vatMarker = 'OmvendtCopydan'; + } else if (hasReverseCharge) { + vatMarker = 'Omvendt'; + } else if (hasCopydan) { + vatMarker = 'Copydan'; + } + + lineItemsHtml += ''; + lineItemsHtml += ``; // position + lineItemsHtml += ``; // item_number + lineItemsHtml += ``; // description (truncated) + lineItemsHtml += ``; // quantity + lineItemsHtml += ``; // unit_price + lineItemsHtml += ``; // total_price + lineItemsHtml += ``; // vat marker + lineItemsHtml += ''; + } + + lineItemsHtml += '
    PositionItemDescriptionQtyPriceTotalVAT
    ${item[1]}${item[2]}${item[3] ? item[3].trim().substring(0, 40) : ''}${price[1]}${price[2]}${price[3]}${vatMarker}
    '; + const totalLines = Math.min(itemMatches.length, priceMatches.length); + if (totalLines > 5) { + lineItemsHtml += `

    ... og ${totalLines - 5} linjer mere

    `; + } + } else { + lineItemsHtml += `

    ❌ Fandt ${itemMatches.length} item-linjer og ${priceMatches.length} pris-linjer

    `; + } + } else { + // Single-pattern extraction (old style) + const lineRegex = new RegExp(fieldMappings.line_item.pattern, 'gim'); + const lines = [...lineSection.matchAll(lineRegex)]; + + if (lines.length > 0) { + lineItemsHtml += `

    ✅ Fandt ${lines.length} varelinjer:

    `; + lineItemsHtml += '
    '; + + const fields = fieldMappings.line_item.fields || ['position', 'item_number', 'description', 'quantity', 'unit_price', 'total_price']; + fields.forEach(f => { + lineItemsHtml += ``; + }); + lineItemsHtml += ''; + + // Show first 5 lines + lines.slice(0, 5).forEach(match => { + lineItemsHtml += ''; + for (let i = 1; i <= fields.length; i++) { + lineItemsHtml += ``; + } + lineItemsHtml += ''; + }); + + lineItemsHtml += '
    ${f}
    ${match[i] ? match[i].trim() : ''}
    '; + if (lines.length > 5) { + lineItemsHtml += `

    ... og ${lines.length - 5} linjer mere

    `; + } + } else { + lineItemsHtml += '

    ❌ Ingen linjer fundet med pattern

    '; + } + } + } else { + lineItemsHtml += `

    ⚠️ Start eller slut marker ikke fundet

    `; + if (!startMatch) lineItemsHtml += `Start pattern: "${fieldMappings.lines_start.pattern}" ikke fundet
    `; + if (!endMatch) lineItemsHtml += `Slut pattern: "${fieldMappings.lines_end.pattern}" ikke fundet`; + } + } catch (e) { + lineItemsHtml += `

    ❌ Fejl: ${e.message}

    `; + console.error('Line extraction error:', e); + } + } + // Show results testResults.innerHTML = `
    ${matched ? '✅' : '❌'} Template ${matched ? 'MATCHER' : 'MATCHER IKKE'}

    Confidence: ${(confidence * 100).toFixed(0)}% (threshold: 70%)

    ${detectionHtml} ${extractedHtml} + ${lineItemsHtml} `; if (matched && extractedCount > 0) { diff --git a/app/billing/frontend/templates_list.html b/app/billing/frontend/templates_list.html index 6fb2b0e..14b0e43 100644 --- a/app/billing/frontend/templates_list.html +++ b/app/billing/frontend/templates_list.html @@ -163,6 +163,9 @@ async function loadTemplates() {

    + @@ -181,28 +184,51 @@ async function loadTemplates() { } } -async function loadPendingFiles() { +async function loadPendingFiles(vendorId = null) { try { - const response = await fetch('/api/v1/supplier-invoices/pending-files'); + const response = await fetch('/api/v1/pending-supplier-invoice-files'); const data = await response.json(); const select = document.getElementById('testFileSelect'); select.innerHTML = ''; - data.files.forEach(file => { + // Filter by vendor if provided + let files = data.files; + if (vendorId) { + files = files.filter(f => f.vendor_matched_id == vendorId); + } + + files.forEach(file => { select.innerHTML += ``; }); + + // Show message if no files for this vendor + if (vendorId && files.length === 0) { + select.innerHTML += ''; + } } catch (error) { console.error('Failed to load files:', error); } } -function openTestModal(templateId, templateName) { +async function openTestModal(templateId, templateName) { currentTemplateId = templateId; document.getElementById('modalTemplateName').textContent = templateName; document.getElementById('testResultsContainer').classList.add('d-none'); document.getElementById('testFileSelect').value = ''; + // Load template to get vendor_id + try { + const response = await fetch(`/api/v1/supplier-invoices/templates/${templateId}`); + const template = await response.json(); + + // Reload files filtered by this template's vendor + await loadPendingFiles(template.vendor_id); + } catch (error) { + console.error('Failed to load template:', error); + await loadPendingFiles(); // Fallback to all files + } + const modal = new bootstrap.Modal(document.getElementById('testModal')); modal.show(); } @@ -357,6 +383,11 @@ async function deleteTemplate(templateId) { alert('❌ Kunne ikke slette template'); } } + +function editTemplate(templateId) { + // Redirect to template builder with template ID + window.location.href = `/billing/template-builder?id=${templateId}`; +} diff --git a/app/core/config.py b/app/core/config.py index 9574d5a..074e65f 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -35,7 +35,7 @@ class Settings(BaseSettings): # Ollama AI Integration OLLAMA_ENDPOINT: str = "http://ai_direct.cs.blaahund.dk" - OLLAMA_MODEL: str = "qwen2.5:3b" # Hurtigere model til JSON extraction + OLLAMA_MODEL: str = "qwen2.5-coder:7b" # qwen2.5-coder fungerer bedre til JSON udtrækning # Company Info OWN_CVR: str = "29522790" # BMC Denmark ApS - ignore when detecting vendors diff --git a/app/services/ollama_service.py b/app/services/ollama_service.py index 598d60c..ef6abff 100644 --- a/app/services/ollama_service.py +++ b/app/services/ollama_service.py @@ -146,26 +146,85 @@ Output: { try: import httpx + # Detect if using qwen3 model (requires Chat API) + use_chat_api = self.model.startswith('qwen3') + async with httpx.AsyncClient(timeout=1000.0) as client: - response = await client.post( - f"{self.endpoint}/api/generate", - json={ - "model": self.model, - "prompt": prompt, - "stream": False, - "options": { - "temperature": 0.1, - "top_p": 0.9, - "num_predict": 2000 + if use_chat_api: + # qwen3 models use Chat API format + logger.info(f"🤖 Using Chat API for {self.model}") + response = await client.post( + f"{self.endpoint}/api/chat", + json={ + "model": self.model, + "messages": [ + { + "role": "system", + "content": self.system_prompt + }, + { + "role": "user", + "content": f"NU SKAL DU UDTRÆKKE DATA FRA DENNE FAKTURA:\n{text}\n\nVIGTIGT: Dit svar skal STARTE med {{ og SLUTTE med }} - ingen forklaring før eller efter JSON!" + } + ], + "stream": False, + "format": "json", + "options": { + "temperature": 0.1, + "top_p": 0.9, + "num_predict": 2000 + } } - } - ) + ) + else: + # qwen2.5 and other models use Generate API format + logger.info(f"🤖 Using Generate API for {self.model}") + response = await client.post( + f"{self.endpoint}/api/generate", + json={ + "model": self.model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.1, + "top_p": 0.9, + "num_predict": 2000 + } + } + ) if response.status_code != 200: raise Exception(f"Ollama returned status {response.status_code}: {response.text}") result = response.json() - raw_response = result.get("response", "") + + # Extract response based on API type + if use_chat_api: + # qwen3 models sometimes put the actual response in "thinking" field + raw_response = result.get("message", {}).get("content", "") + thinking = result.get("message", {}).get("thinking", "") + + # If content is empty but thinking has data, try to extract JSON from thinking + if not raw_response and thinking: + logger.info(f"💭 Content empty, attempting to extract JSON from thinking field (length: {len(thinking)})") + # Try to find JSON block in thinking text + json_start = thinking.find('{') + json_end = thinking.rfind('}') + 1 + if json_start >= 0 and json_end > json_start: + potential_json = thinking[json_start:json_end] + logger.info(f"📦 Found potential JSON in thinking field (length: {len(potential_json)})") + raw_response = potential_json + else: + logger.warning(f"⚠️ No JSON found in thinking field, using full thinking as fallback") + raw_response = thinking + elif thinking: + logger.info(f"💭 Model thinking (length: {len(thinking)})") + + # DEBUG: Log full result structure + logger.info(f"📊 Chat API result keys: {list(result.keys())}") + logger.info(f"📊 Message keys: {list(result.get('message', {}).keys())}") + else: + raw_response = result.get("response", "") logger.info(f"✅ Ollama extraction completed (response length: {len(raw_response)})") @@ -243,12 +302,16 @@ Output: { def _parse_json_response(self, response: str) -> Dict: """Parse JSON from LLM response with improved error handling""" try: + # Log preview of response for debugging + logger.info(f"🔍 Response preview (first 500 chars): {response[:500]}") + # Find JSON in response (between first { and last }) start = response.find('{') end = response.rfind('}') + 1 if start >= 0 and end > start: json_str = response[start:end] + logger.info(f"🔍 Extracted JSON string length: {len(json_str)}, starts at position {start}") # Try to fix common JSON issues # Remove trailing commas before } or ] diff --git a/app/services/template_service.py b/app/services/template_service.py index 69983e8..9e99b06 100644 --- a/app/services/template_service.py +++ b/app/services/template_service.py @@ -20,13 +20,20 @@ class TemplateService: def __init__(self): self.templates_cache = {} - self._load_templates() + self._initialized = False + + def _ensure_loaded(self): + """Lazy load templates on first use""" + if not self._initialized: + logger.info("🔄 Lazy loading templates...") + self._load_templates() + self._initialized = True def _load_templates(self): """Load all active templates into cache""" try: templates = execute_query( - """SELECT t.*, v.name as vendor_name, v.cvr as vendor_cvr + """SELECT t.*, v.name as vendor_name, v.cvr_number as vendor_cvr FROM supplier_invoice_templates t LEFT JOIN vendors v ON t.vendor_id = v.id WHERE t.is_active = TRUE""" @@ -46,12 +53,17 @@ class TemplateService: Find best matching template for PDF text Returns: (template_id, confidence_score) """ + self._ensure_loaded() # Lazy load templates + + logger.info(f"🔍 Matching against {len(self.templates_cache)} templates") + best_match = None best_score = 0.0 pdf_text_lower = pdf_text.lower() for template_id, template in self.templates_cache.items(): score = self._calculate_match_score(pdf_text_lower, template) + logger.debug(f" Template {template_id} ({template['template_name']}): {score:.2f}") if score > best_score: best_score = score @@ -59,6 +71,8 @@ class TemplateService: if best_match: logger.info(f"✅ Matched template {best_match} ({self.templates_cache[best_match]['template_name']}) with {best_score:.0%} confidence") + else: + logger.info(f"⚠️ No template matched (best score: {best_score:.2f})") return best_match, best_score @@ -96,6 +110,8 @@ class TemplateService: def extract_fields(self, pdf_text: str, template_id: int) -> Dict: """Extract invoice fields using template's regex patterns""" + self._ensure_loaded() # Lazy load templates + template = self.templates_cache.get(template_id) if not template: logger.warning(f"⚠️ Template {template_id} not found in cache") @@ -124,6 +140,8 @@ class TemplateService: def extract_line_items(self, pdf_text: str, template_id: int) -> List[Dict]: """Extract invoice line items using template's line patterns""" + self._ensure_loaded() # Lazy load templates + template = self.templates_cache.get(template_id) if not template: logger.warning(f"⚠️ Template {template_id} not found in cache") @@ -227,6 +245,7 @@ class TemplateService: quantity = None unit_price = None total_price = None + vat_note = None # For "Omvendt betalingspligt" etc. for j in range(i+1, min(i+10, len(lines_arr))): price_line = lines_arr[j].strip() @@ -236,11 +255,20 @@ class TemplateService: quantity = price_match.group(1) unit_price = price_match.group(2).replace(',', '.') total_price = price_match.group(3).replace(',', '.') + + # Check next 3 lines for VAT markers + for k in range(j+1, min(j+4, len(lines_arr))): + vat_line = lines_arr[k].strip().lower() + if 'omvendt' in vat_line and 'betalingspligt' in vat_line: + vat_note = "reverse_charge" + logger.debug(f"⚠️ Found reverse charge marker for item {item_number}") + elif 'copydan' in vat_line: + vat_note = "copydan_included" break # Kun tilføj hvis vi fandt priser if quantity and unit_price: - items.append({ + item_data = { 'line_number': len(items) + 1, 'position': position, 'item_number': item_number, @@ -249,8 +277,14 @@ class TemplateService: 'unit_price': unit_price, 'total_price': total_price, 'raw_text': f"{line} ... {quantity}ST {unit_price} {total_price}" - }) - logger.info(f"✅ Multi-line item: {item_number} - {description[:30]}... ({quantity}ST @ {unit_price})") + } + + # Add VAT note if found + if vat_note: + item_data['vat_note'] = vat_note + + items.append(item_data) + logger.info(f"✅ Multi-line item: {item_number} - {description[:30]}... ({quantity}ST @ {unit_price}){' [REVERSE CHARGE]' if vat_note == 'reverse_charge' else ''}") i += 1 @@ -264,12 +298,13 @@ class TemplateService: def log_usage(self, template_id: int, file_id: int, matched: bool, confidence: float, fields: Dict): """Log template usage for statistics""" + import json try: execute_insert( """INSERT INTO template_usage_log (template_id, file_id, matched, confidence, fields_extracted) VALUES (%s, %s, %s, %s, %s)""", - (template_id, file_id, matched, confidence, fields) + (template_id, file_id, matched, confidence, json.dumps(fields)) ) if matched: @@ -298,7 +333,8 @@ class TemplateService: def reload_templates(self): """Reload templates from database""" self.templates_cache = {} - self._load_templates() + self._initialized = False + self._ensure_loaded() # Global instance diff --git a/docker-compose.yml b/docker-compose.yml index fd4cb10..1340065 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -51,6 +51,8 @@ services: # Override database URL to point to postgres service - DATABASE_URL=postgresql://${POSTGRES_USER:-bmc_hub}:${POSTGRES_PASSWORD:-bmc_hub}@postgres:5432/${POSTGRES_DB:-bmc_hub} - ENABLE_RELOAD=false + - OLLAMA_MODEL=qwen3:4b # Bruger Chat API format + - OLLAMA_MODEL_FALLBACK=qwen2.5:3b # Backup model restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] diff --git a/docker-compose.yml.bak2 b/docker-compose.yml.bak2 new file mode 100644 index 0000000..b9138ff --- /dev/null +++ b/docker-compose.yml.bak2 @@ -0,0 +1,71 @@ +version: '3.8' + +services: + # PostgreSQL Database + postgres: + image: postgres:16-alpine + container_name: bmc-hub-postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-bmc_hub} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-bmc_hub} + POSTGRES_DB: ${POSTGRES_DB:-bmc_hub} + volumes: + - postgres_data:/var/lib/postgresql/data + - ./migrations/init.sql:/docker-entrypoint-initdb.d/init.sql:ro + ports: + - "${POSTGRES_PORT:-5433}:5432" + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-bmc_hub}"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - bmc-hub-network + + # FastAPI Application + api: + build: + context: . + dockerfile: Dockerfile + container_name: bmc-hub-api + depends_on: + postgres: + condition: service_healthy + ports: + - "${API_PORT:-8001}:8000" + volumes: + - ./logs:/app/logs + - ./uploads:/app/uploads + - ./static:/app/static + - ./data:/app/data + # Mount for local development - live code reload + - ./app:/app/app:ro + - ./main.py:/app/main.py:ro + - ./scripts:/app/scripts:ro + # Mount OmniSync database for import (read-only) + - /Users/christianthomas/pakkemodtagelse/data:/omnisync_data:ro + env_file: + - .env + environment: + # Override database URL to point to postgres service + - DATABASE_URL=postgresql://${POSTGRES_USER:-bmc_hub}:${POSTGRES_PASSWORD:-bmc_hub}@postgres:5432/${POSTGRES_DB:-bmc_hub} + - ENABLE_RELOAD=false + - OLLAMA_MODEL=qwen3:4b + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - bmc-hub-network + +networks: + bmc-hub-network: + driver: bridge + +volumes: + postgres_data: + driver: local diff --git a/docker-compose.yml.bak3 b/docker-compose.yml.bak3 new file mode 100644 index 0000000..60dac20 --- /dev/null +++ b/docker-compose.yml.bak3 @@ -0,0 +1,71 @@ +version: '3.8' + +services: + # PostgreSQL Database + postgres: + image: postgres:16-alpine + container_name: bmc-hub-postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-bmc_hub} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-bmc_hub} + POSTGRES_DB: ${POSTGRES_DB:-bmc_hub} + volumes: + - postgres_data:/var/lib/postgresql/data + - ./migrations/init.sql:/docker-entrypoint-initdb.d/init.sql:ro + ports: + - "${POSTGRES_PORT:-5433}:5432" + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-bmc_hub}"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - bmc-hub-network + + # FastAPI Application + api: + build: + context: . + dockerfile: Dockerfile + container_name: bmc-hub-api + depends_on: + postgres: + condition: service_healthy + ports: + - "${API_PORT:-8001}:8000" + volumes: + - ./logs:/app/logs + - ./uploads:/app/uploads + - ./static:/app/static + - ./data:/app/data + # Mount for local development - live code reload + - ./app:/app/app:ro + - ./main.py:/app/main.py:ro + - ./scripts:/app/scripts:ro + # Mount OmniSync database for import (read-only) + - /Users/christianthomas/pakkemodtagelse/data:/omnisync_data:ro + env_file: + - .env + environment: + # Override database URL to point to postgres service + - DATABASE_URL=postgresql://${POSTGRES_USER:-bmc_hub}:${POSTGRES_PASSWORD:-bmc_hub}@postgres:5432/${POSTGRES_DB:-bmc_hub} + - ENABLE_RELOAD=false + - OLLAMA_MODEL=qwen2.5:3b + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - bmc-hub-network + +networks: + bmc-hub-network: + driver: bridge + +volumes: + postgres_data: + driver: local