feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction

- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals. - Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number. - Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table. - Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00 · 2025-12-09 14:54:33 +01:00 · 3a8288f5a1
commit 3a8288f5a1
parent 890bd6245d
16 changed files with 2731 additions and 205 deletions
--- a/app/billing/backend/supplier_invoices.py
+++ b/app/billing/backend/supplier_invoices.py
@ -13,6 +13,7 @@ from app.core.config import settings
 from app.services.economic_service import get_economic_service
 from app.services.ollama_service import ollama_service
 from app.services.template_service import template_service
 from app.services.invoice2data_service import get_invoice2data_service
 import logging
 import os
 import re
@ -232,15 +233,25 @@ async def get_pending_files():
                f.error_message, 
                f.template_id,
                f.file_path,
                -- Quick analysis results (available immediately on upload)
                f.detected_cvr,
                f.detected_vendor_id,
                f.detected_document_type,
                f.detected_document_number,
                f.is_own_invoice,
                v_detected.name as detected_vendor_name,
                v_detected.cvr_number as detected_vendor_cvr,
                -- Get vendor info from latest extraction
                ext.vendor_name,
                ext.vendor_cvr,
                ext.vendor_matched_id,
                v.name as matched_vendor_name,
                v.cvr_number as matched_vendor_cvr_number,
                -- Check if already has invoice via latest extraction only
                si.id as existing_invoice_id,
                si.invoice_number as existing_invoice_number
               FROM incoming_files f
               LEFT JOIN vendors v_detected ON v_detected.id = f.detected_vendor_id
               LEFT JOIN LATERAL (
                   SELECT extraction_id, file_id, vendor_name, vendor_cvr, vendor_matched_id
                   FROM extractions 
@ -250,16 +261,82 @@ async def get_pending_files():
               ) ext ON true
               LEFT JOIN vendors v ON v.id = ext.vendor_matched_id
               LEFT JOIN supplier_invoices si ON si.extraction_id = ext.extraction_id
-               WHERE f.status IN ('pending', 'processing', 'failed', 'ai_extracted', 'processed')
+               WHERE f.status IN ('pending', 'processing', 'failed', 'ai_extracted', 'processed', 'duplicate')
                 AND si.id IS NULL  -- Only show files without invoice yet
               ORDER BY f.file_id, f.uploaded_at DESC"""
        )
        # Convert to regular dicts so we can add new keys
        files = [dict(file) for file in files] if files else []
        # Check for invoice2data templates for each file
        try:
            from app.services.invoice2data_service import get_invoice2data_service
            invoice2data = get_invoice2data_service()
            logger.info(f"📋 Checking invoice2data templates: {len(invoice2data.templates)} loaded")
            for file in files:
                # Check if there's an invoice2data template for this vendor's CVR
                vendor_cvr = file.get('matched_vendor_cvr_number') or file.get('detected_vendor_cvr') or file.get('vendor_cvr')
                file['has_invoice2data_template'] = False
                logger.debug(f"  File {file['file_id']}: CVR={vendor_cvr}")
                if vendor_cvr:
                    # Check all templates for this CVR in keywords
                    for template_name, template_data in invoice2data.templates.items():
                        keywords = template_data.get('keywords', [])
                        logger.debug(f"    Template {template_name}: keywords={keywords}")
                        if str(vendor_cvr) in [str(k) for k in keywords]:
                            file['has_invoice2data_template'] = True
                            file['invoice2data_template_name'] = template_name
                            logger.info(f"  ✅ File {file['file_id']} matched template: {template_name}")
                            break
        except Exception as e:
            logger.error(f"❌ Failed to check invoice2data templates: {e}", exc_info=True)
            # Continue without invoice2data info
        return {"files": files if files else [], "count": len(files) if files else 0}
    except Exception as e:
        logger.error(f"❌ Failed to get pending files: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/pdf-text")
 async def get_file_pdf_text(file_id: int):
    """Hent fuld PDF tekst fra en uploaded fil (til template builder)"""
    try:
        # Get file info
        file_info = execute_query(
            "SELECT file_path, filename FROM incoming_files WHERE file_id = %s",
            (file_id,),
            fetchone=True
        )
        if not file_info:
            raise HTTPException(status_code=404, detail="Fil ikke fundet")
        # Read PDF text
        from pathlib import Path
        file_path = Path(file_info['file_path'])
        if not file_path.exists():
            raise HTTPException(status_code=404, detail=f"Fil ikke fundet på disk: {file_path}")
        pdf_text = await ollama_service._extract_text_from_file(file_path)
        return {
            "file_id": file_id,
            "filename": file_info['filename'],
            "pdf_text": pdf_text
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Failed to get PDF text: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/files/{file_id}/extracted-data")
 async def get_file_extracted_data(file_id: int):
    """Hent AI-extracted data fra en uploaded fil"""
@ -758,8 +835,9 @@ async def create_invoice_from_extraction(file_id: int):
@router.get("/supplier-invoices/templates")
 async def list_templates():
-    """Hent alle templates"""
+    """Hent alle templates (både database og invoice2data YAML)"""
    try:
        # Get database templates
        query = """
            SELECT t.*, v.name as vendor_name
            FROM supplier_invoice_templates t
@ -767,9 +845,55 @@ async def list_templates():
            WHERE t.is_active = true
            ORDER BY t.created_at DESC
        """
-        templates = execute_query(query)
+        db_templates = execute_query(query) or []
-        return templates if templates else []
+        # Get invoice2data templates
        invoice2data_service = get_invoice2data_service()
        invoice2data_templates = []
        for template_name, template_data in invoice2data_service.templates.items():
            # Extract vendor CVR from keywords
            vendor_cvr = None
            keywords = template_data.get('keywords', [])
            for keyword in keywords:
                if isinstance(keyword, str) and keyword.isdigit() and len(keyword) == 8:
                    vendor_cvr = keyword
                    break
            # Get vendor info from database if CVR found
            vendor_name = template_data.get('issuer', 'Ukendt')
            vendor_id = None
            if vendor_cvr:
                vendor = execute_query(
                    "SELECT id, name FROM vendors WHERE cvr_number = %s",
                    (vendor_cvr,),
                    fetchone=True
                )
                if vendor:
                    vendor_id = vendor['id']
                    vendor_name = vendor['name']
            invoice2data_templates.append({
                'template_id': -1,  # Negative ID to distinguish from DB templates
                'template_name': f"Invoice2Data: {template_name}",
                'template_type': 'invoice2data',
                'yaml_filename': template_name,
                'vendor_id': vendor_id,
                'vendor_name': vendor_name,
                'vendor_cvr': vendor_cvr,
                'default_product_category': template_data.get('default_product_category', 'varesalg'),
                'default_product_group_number': template_data.get('default_product_group_number', 1),
                'usage_count': 0,  # Could track this separately
                'is_active': True,
                'detection_patterns': keywords,
                'field_mappings': template_data.get('fields', {}),
                'created_at': None
            })
        # Combine both types
        all_templates = db_templates + invoice2data_templates
        return all_templates
    except Exception as e:
        logger.error(f"❌ Failed to list templates: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@ -978,6 +1102,7 @@ async def create_template(request: Dict):
        template_name = request.get('template_name')
        detection_patterns = request.get('detection_patterns', [])
        field_mappings = request.get('field_mappings', {})
        default_product_category = request.get('default_product_category', 'varesalg')
        if not vendor_id or not template_name:
            raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet")
@ -996,11 +1121,11 @@ async def create_template(request: Dict):
        # Insert template and get template_id
        query = """
            INSERT INTO supplier_invoice_templates 
-            (vendor_id, template_name, detection_patterns, field_mappings)
+            (vendor_id, template_name, detection_patterns, field_mappings, default_product_category)
-            VALUES (%s, %s, %s, %s)
+            VALUES (%s, %s, %s, %s, %s)
            RETURNING template_id
        """
-        result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings)))
+        result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings), default_product_category))
        template_id = result[0]['template_id'] if result else None
        if not template_id:
@ -1657,6 +1782,97 @@ async def upload_supplier_invoice(file: UploadFile = File(...)):
            logger.info(f"📄 Extracting text from {final_path.suffix}...")
            text = await ollama_service._extract_text_from_file(final_path)
            # QUICK ANALYSIS: Extract CVR, document type, invoice number IMMEDIATELY
            logger.info(f"⚡ Running quick analysis...")
            quick_result = await ollama_service.quick_analysis_on_upload(text)
            # Update file record with quick analysis results
            execute_update(
                """UPDATE incoming_files 
                   SET detected_cvr = %s, 
                       detected_vendor_id = %s,
                       detected_document_type = %s,
                       detected_document_number = %s,
                       is_own_invoice = %s
                   WHERE file_id = %s""",
                (quick_result.get('cvr'),
                 quick_result.get('vendor_id'),
                 quick_result.get('document_type'),
                 quick_result.get('document_number'),
                 quick_result.get('is_own_invoice', False),
                 file_id)
            )
            logger.info(f"📋 Quick analysis saved: CVR={quick_result.get('cvr')}, "
                       f"Vendor={quick_result.get('vendor_name')}, "
                       f"Type={quick_result.get('document_type')}, "
                       f"Number={quick_result.get('document_number')}")
            # DUPLICATE CHECK: Check if invoice number already exists
            document_number = quick_result.get('document_number')
            if document_number:
                logger.info(f"🔍 Checking for duplicate invoice number: {document_number}")
                # Check 1: Search in local database (supplier_invoices table)
                existing_invoice = execute_query(
                    """SELECT si.id, si.invoice_number, si.created_at, v.name as vendor_name
                       FROM supplier_invoices si
                       LEFT JOIN vendors v ON v.id = si.vendor_id
                       WHERE si.invoice_number = %s
                       ORDER BY si.created_at DESC
                       LIMIT 1""",
                    (document_number,),
                    fetchone=True
                )
                if existing_invoice:
                    # DUPLICATE FOUND IN DATABASE
                    logger.error(f"🚫 DUPLICATE: Invoice {document_number} already exists in database (ID: {existing_invoice['id']})")
                    # Mark file as duplicate
                    execute_update(
                        """UPDATE incoming_files 
                           SET status = 'duplicate', 
                               error_message = %s,
                               processed_at = CURRENT_TIMESTAMP
                           WHERE file_id = %s""",
                        (f"DUBLET: Fakturanummer {document_number} findes allerede i systemet (Faktura #{existing_invoice['id']}, {existing_invoice['vendor_name'] or 'Ukendt leverandør'})",
                         file_id)
                    )
                    raise HTTPException(
                        status_code=409,  # 409 Conflict
                        detail=f"🚫 DUBLET: Fakturanummer {document_number} findes allerede i systemet (Faktura #{existing_invoice['id']}, oprettet {existing_invoice['created_at'].strftime('%d-%m-%Y')})"
                    )
                # Check 2: Search in e-conomic (if configured)
                from app.services.economic_service import economic_service
                if hasattr(economic_service, 'app_secret_token') and economic_service.app_secret_token:
                    logger.info(f"🔍 Checking e-conomic for invoice number: {document_number}")
                    economic_duplicate = await economic_service.check_invoice_number_exists(document_number)
                    if economic_duplicate:
                        # DUPLICATE FOUND IN E-CONOMIC
                        logger.error(f"🚫 DUPLICATE: Invoice {document_number} found in e-conomic (Voucher #{economic_duplicate.get('voucher_number')})")
                        # Mark file as duplicate
                        execute_update(
                            """UPDATE incoming_files 
                               SET status = 'duplicate', 
                                   error_message = %s,
                                   processed_at = CURRENT_TIMESTAMP
                               WHERE file_id = %s""",
                            (f"DUBLET: Fakturanummer {document_number} findes i e-conomic (Bilag #{economic_duplicate.get('voucher_number')})",
                             file_id)
                        )
                        raise HTTPException(
                            status_code=409,  # 409 Conflict
                            detail=f"🚫 DUBLET: Fakturanummer {document_number} findes i e-conomic (Bilag #{economic_duplicate.get('voucher_number')}, {economic_duplicate.get('date')})"
                        )
                logger.info(f"✅ No duplicate found for invoice {document_number}")
            # Try template matching
            logger.info(f"📋 Matching template...")
            template_id, confidence = template_service.match_template(text)
@ -1699,7 +1915,8 @@ async def upload_supplier_invoice(file: UploadFile = File(...)):
                            """INSERT INTO extraction_lines
                               (extraction_id, line_number, description, quantity, unit_price, 
                                line_total, vat_rate, vat_note, confidence)
-                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
+                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                               RETURNING line_id""",
                            (extraction_id, idx, line.get('description'), 
                             line.get('quantity'), line.get('unit_price'),
                             line.get('line_total'), line.get('vat_rate'), 
@ -1744,13 +1961,41 @@ async def upload_supplier_invoice(file: UploadFile = File(...)):
                "confidence": confidence,
                "extracted_fields": extracted_fields,
                "pdf_text": text[:500],  # First 500 chars for reference
                # Quick analysis results (available IMMEDIATELY on upload)
                "quick_analysis": {
                    "cvr": quick_result.get('cvr'),
                    "vendor_id": quick_result.get('vendor_id'),
                    "vendor_name": quick_result.get('vendor_name'),
                    "document_type": quick_result.get('document_type'),
                    "document_number": quick_result.get('document_number')
                },
                "message": "Upload gennemført - gennemgå og bekræft data"
            }
-        except HTTPException:
+        except HTTPException as he:
            # Mark file as failed if we have file_id
            if 'file_id' in locals():
                execute_update(
                    """UPDATE incoming_files 
                       SET status = 'failed', 
                           error_message = %s,
                           processed_at = CURRENT_TIMESTAMP
                       WHERE file_id = %s""",
                    (str(he.detail), file_id)
                )
            raise
        except Exception as e:
            logger.error(f"❌ Upload failed (inner): {e}", exc_info=True)
            # Mark file as failed if we have file_id
            if 'file_id' in locals():
                execute_update(
                    """UPDATE incoming_files 
                       SET status = 'failed', 
                           error_message = %s,
                           processed_at = CURRENT_TIMESTAMP
                       WHERE file_id = %s""",
                    (str(e), file_id)
                )
            raise HTTPException(status_code=500, detail=f"Upload fejlede: {str(e)}")
    except HTTPException:
@ -1809,51 +2054,174 @@ async def reprocess_uploaded_file(file_id: int):
            logger.info(f"✅ Matched template {template_id} ({confidence:.0%})")
            extracted_fields = template_service.extract_fields(text, template_id)
-            template = template_service.templates_cache.get(template_id)
+            # Check if this is an invoice2data template (ID -1)
-            if template:
+            is_invoice2data = (template_id == -1)
                vendor_id = template.get('vendor_id')
-            template_service.log_usage(template_id, file_id, True, confidence, extracted_fields)
+            if is_invoice2data:
                # Invoice2data doesn't have vendor in cache
                logger.info(f"📋 Using invoice2data template")
                # Try to find vendor from extracted CVR
                if extracted_fields.get('vendor_vat'):
                    vendor = execute_query(
                        "SELECT id FROM vendors WHERE cvr_number = %s",
                        (extracted_fields['vendor_vat'],),
                        fetchone=True
                    )
                    if vendor:
                        vendor_id = vendor['id']
                # Store invoice2data extraction in database
                extraction_id = execute_insert(
                    """INSERT INTO extractions
                       (file_id, vendor_matched_id, vendor_name, vendor_cvr, 
                        document_id, document_date, due_date, document_type, document_type_detected,
                        total_amount, currency, confidence, llm_response_json, status)
                       VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) 
                       RETURNING extraction_id""",
                    (file_id, vendor_id,
                     extracted_fields.get('issuer'),  # vendor_name
                     extracted_fields.get('vendor_vat'),  # vendor_cvr
                     str(extracted_fields.get('invoice_number')),  # document_id
                     extracted_fields.get('invoice_date'),  # document_date
                     extracted_fields.get('due_date'),
                     'invoice',  # document_type
                     'invoice',  # document_type_detected
                     extracted_fields.get('amount_total'),
                     extracted_fields.get('currency', 'DKK'),
                     1.0,  # invoice2data always 100% confidence
                     json.dumps(extracted_fields),  # llm_response_json
                     'extracted')  # status
                )
                # Insert line items if extracted
                if extracted_fields.get('lines'):
                    for idx, line in enumerate(extracted_fields['lines'], start=1):
                        execute_insert(
                            """INSERT INTO extraction_lines
                               (extraction_id, line_number, description, quantity, unit_price, 
                                line_total, vat_rate, vat_note, confidence,
                                ip_address, contract_number, location_street, location_zip, location_city)
                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                               RETURNING line_id""",
                            (extraction_id, idx, line.get('description'), 
                             line.get('quantity'), line.get('unit_price'),
                             line.get('line_total'), None, None, 1.0,
                             line.get('ip_address'), line.get('contract_number'),
                             line.get('location_street'), line.get('location_zip'), line.get('location_city'))
                        )
                    logger.info(f"✅ Saved {len(extracted_fields['lines'])} line items")
            else:
                # Custom template from database
                template = template_service.templates_cache.get(template_id)
                if template:
                    vendor_id = template.get('vendor_id')
                template_service.log_usage(template_id, file_id, True, confidence, extracted_fields)
            # Update file - use NULL for invoice2data templates to avoid FK constraint
            db_template_id = None if is_invoice2data else template_id
            execute_update(
                """UPDATE incoming_files 
                   SET status = 'processed', template_id = %s, processed_at = CURRENT_TIMESTAMP
                   WHERE file_id = %s""",
-                (template_id, file_id)
+                (db_template_id, file_id)
            )
        else:
-            # NO AI FALLBACK - Require template matching
+            # FALLBACK TO AI EXTRACTION
-            logger.warning(f"⚠️ Ingen template match (confidence: {confidence:.0%}) - afviser fil")
+            logger.info(f"⚠️ Ingen template match (confidence: {confidence:.0%}) - bruger AI extraction")
            # Use detected vendor from quick analysis if available
            vendor_id = file_record.get('detected_vendor_id')
            # Call Ollama for full extraction
            logger.info(f"🤖 Calling Ollama for AI extraction...")
            llm_result = await ollama_service.extract_from_text(text)
            if not llm_result or 'error' in llm_result:
                error_msg = llm_result.get('error') if llm_result else 'AI extraction fejlede'
                logger.error(f"❌ AI extraction failed: {error_msg}")
                execute_update(
                    """UPDATE incoming_files 
                       SET status = 'failed', 
                           error_message = %s,
                           processed_at = CURRENT_TIMESTAMP
                       WHERE file_id = %s""",
                    (f"AI extraction fejlede: {error_msg}", file_id)
                )
                raise HTTPException(status_code=500, detail=f"AI extraction fejlede: {error_msg}")
            extracted_fields = llm_result
            confidence = llm_result.get('confidence', 0.75)
            # Store AI extracted data in extractions table
            extraction_id = execute_insert(
                """INSERT INTO supplier_invoice_extractions
                   (file_id, vendor_id, invoice_number, invoice_date, due_date,
                    total_amount, currency, document_type, confidence, llm_data)
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING extraction_id""",
                (file_id, vendor_id, 
                 llm_result.get('invoice_number'),
                 llm_result.get('invoice_date'),
                 llm_result.get('due_date'),
                 llm_result.get('total_amount'),
                 llm_result.get('currency', 'DKK'),
                 llm_result.get('document_type'),
                 confidence,
                 json.dumps(llm_result))
            )
            # Insert line items if extracted
            if llm_result.get('lines'):
                for idx, line in enumerate(llm_result['lines'], start=1):
                    execute_insert(
                        """INSERT INTO extraction_lines
                           (extraction_id, line_number, description, quantity, unit_price, 
                            line_total, vat_rate, vat_note, confidence)
                           VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                           RETURNING line_id""",
                        (extraction_id, idx, line.get('description'), 
                         line.get('quantity'), line.get('unit_price'),
                         line.get('line_total'), line.get('vat_rate'), 
                         line.get('vat_note'), confidence)
                    )
            # Update file status to ai_extracted
            execute_update(
                """UPDATE incoming_files 
-                   SET status = 'failed', 
+                   SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP
                       error_message = 'Ingen template match - opret template for denne leverandør',
                       processed_at = CURRENT_TIMESTAMP
                   WHERE file_id = %s""",
                (file_id,)
            )
-            return {
+            logger.info(f"✅ AI extraction completed for file {file_id}")
                "status": "failed",
                "file_id": file_id,
                "error": "Ingen template match - opret template for denne leverandør",
                "confidence": confidence
            }
-        # Return success with template data
+        # Return success with template data or AI extraction result
-        return {
+        result = {
            "status": "success",
            "file_id": file_id,
            "filename": file_record['filename'],
            "template_matched": template_id is not None,
            "template_id": template_id,
            "vendor_id": vendor_id,
-            "confidence": confidence if template_id else 0.8,
+            "confidence": confidence if template_id else llm_result.get('confidence', 0.75),
            "extracted_fields": extracted_fields,
            "pdf_text": text[:1000] if not template_id else text
        }
        # Add warning if no template exists
        if not template_id and vendor_id:
            vendor = execute_query(
                "SELECT name FROM vendors WHERE id = %s",
                (vendor_id,),
                fetchone=True
            )
            if vendor:
                result["warning"] = f"⚠️ Ingen template fundet for {vendor['name']} - brugte AI extraction (langsommere)"
        return result
    except HTTPException:
        raise
    except Exception as e:
@ -1866,6 +2234,7 @@ async def update_template(
    template_name: Optional[str] = None,
    detection_patterns: Optional[List[Dict]] = None,
    field_mappings: Optional[Dict] = None,
    default_product_category: Optional[str] = None,
    is_active: Optional[bool] = None
 ):
    """Opdater eksisterende template"""
@ -1884,6 +2253,9 @@ async def update_template(
        if field_mappings is not None:
            updates.append("field_mappings = %s")
            params.append(json.dumps(field_mappings))
        if default_product_category is not None:
            updates.append("default_product_category = %s")
            params.append(default_product_category)
        if is_active is not None:
            updates.append("is_active = %s")
            params.append(is_active)
@ -1911,6 +2283,114 @@ async def update_template(
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/templates/invoice2data/{template_name}/test")
 async def test_invoice2data_template(template_name: str, request: Dict):
    """
    Test invoice2data YAML template mod PDF tekst
    Request body:
    {
        "pdf_text": "Full PDF text content..."
    }
    Returns samme format som test_template endpoint
    """
    try:
        pdf_text = request.get('pdf_text', '')
        if not pdf_text:
            raise HTTPException(status_code=400, detail="pdf_text er påkrævet")
        # Get invoice2data service
        invoice2data_service = get_invoice2data_service()
        # Check if template exists
        if template_name not in invoice2data_service.templates:
            raise HTTPException(status_code=404, detail=f"Template '{template_name}' ikke fundet")
        template_data = invoice2data_service.templates[template_name]
        # Test extraction
        result = invoice2data_service.extract_with_template(pdf_text, template_name)
        if not result:
            # Template didn't match
            keywords = template_data.get('keywords', [])
            detection_results = []
            for keyword in keywords:
                found = str(keyword).lower() in pdf_text.lower()
                detection_results.append({
                    "pattern": str(keyword),
                    "type": "keyword",
                    "found": found,
                    "weight": 0.5
                })
            return {
                "matched": False,
                "confidence": 0.0,
                "extracted_fields": {},
                "line_items": [],
                "detection_results": detection_results,
                "template_name": template_name,
                "error": "Template matchede ikke PDF'en"
            }
        # Extract line items
        line_items = []
        if 'lines' in result:
            for line in result['lines']:
                line_items.append({
                    "line_number": line.get('line_number', ''),
                    "item_number": line.get('item_number', ''),
                    "description": line.get('description_raw', '') or line.get('description', ''),
                    "quantity": line.get('quantity', ''),
                    "unit_price": line.get('unit_price', ''),
                    "line_total": line.get('line_total', ''),
                    # Context fields (circuit/location info)
                    "circuit_id": line.get('circuit_id', ''),
                    "ip_address": line.get('ip_address', ''),
                    "contract_number": line.get('contract_number', ''),
                    "location_street": line.get('location_street', ''),
                    "location_zip": line.get('location_zip', ''),
                    "location_city": line.get('location_city', ''),
                })
        # Build detection results
        keywords = template_data.get('keywords', [])
        detection_results = []
        matched_count = 0
        for keyword in keywords:
            found = str(keyword).lower() in pdf_text.lower()
            if found:
                matched_count += 1
            detection_results.append({
                "pattern": str(keyword),
                "type": "keyword",
                "found": found,
                "weight": 0.5
            })
        confidence = matched_count / len(keywords) if keywords else 1.0
        # Remove 'lines' from extracted_fields to avoid duplication
        extracted_fields = {k: v for k, v in result.items() if k != 'lines'}
        return {
            "matched": True,
            "confidence": confidence,
            "extracted_fields": extracted_fields,
            "line_items": line_items,
            "detection_results": detection_results,
            "template_name": template_name
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Invoice2data template test failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/supplier-invoices/templates/{template_id}/test")
 async def test_template(template_id: int, request: Dict):
    """
@ -2076,6 +2556,102 @@ async def test_template(template_id: int, request: Dict):
        raise HTTPException(status_code=500, detail=str(e))
@router.put("/supplier-invoices/templates/invoice2data/{template_name}/category")
 async def update_yaml_category(template_name: str, request: Dict):
    """
    Opdater default_product_category i YAML template fil
    Request body:
    {
        "category": "drift"  // varesalg, drift, anlæg, abonnement, lager, udlejning
    }
    """
    try:
        import yaml
        from pathlib import Path
        new_category = request.get('category')
        if not new_category:
            raise HTTPException(status_code=400, detail="category er påkrævet")
        # Validate category
        valid_categories = ['varesalg', 'drift', 'anlæg', 'abonnement', 'lager', 'udlejning']
        if new_category not in valid_categories:
            raise HTTPException(status_code=400, detail=f"Ugyldig kategori. Skal være en af: {', '.join(valid_categories)}")
        # Find YAML file
        templates_dir = Path(__file__).parent.parent.parent.parent / 'data' / 'invoice_templates'
        yaml_file = templates_dir / f"{template_name}.yml"
        if not yaml_file.exists():
            raise HTTPException(status_code=404, detail=f"YAML fil ikke fundet: {template_name}.yml")
        # Load YAML
        with open(yaml_file, 'r', encoding='utf-8') as f:
            template_data = yaml.safe_load(f)
        # Update category
        template_data['default_product_category'] = new_category
        # Save YAML with preserved formatting
        with open(yaml_file, 'w', encoding='utf-8') as f:
            yaml.dump(template_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
        # Reload invoice2data service to pick up changes
        invoice2data_service = get_invoice2data_service()
        invoice2data_service.__init__()  # Reinitialize to reload templates
        logger.info(f"✅ Updated category for {template_name}.yml to {new_category}")
        return {
            "message": "Kategori opdateret",
            "template_name": template_name,
            "new_category": new_category
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Failed to update YAML category: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/supplier-invoices/templates/invoice2data/{template_name}/content")
 async def get_yaml_content(template_name: str):
    """
    Hent råt YAML indhold fra template fil
    Returns:
    {
        "content": "issuer: DCS ApS\nkeywords: ..."
    }
    """
    try:
        from pathlib import Path
        # Find template file
        template_dir = Path("data/invoice_templates")
        template_file = template_dir / f"{template_name}.yml"
        if not template_file.exists():
            raise HTTPException(status_code=404, detail=f"Template fil ikke fundet: {template_name}.yml")
        # Read file content
        content = template_file.read_text(encoding='utf-8')
        return {
            "template_name": template_name,
            "filename": f"{template_name}.yml",
            "content": content
        }
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"❌ Failed to read YAML content: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
@router.delete("/supplier-invoices/templates/{template_id}")
 async def delete_template(template_id: int):
    """Slet template (soft delete - sæt is_active=false)"""
--- a/app/billing/frontend/supplier_invoices.html
+++ b/app/billing/frontend/supplier_invoices.html
--- a/app/billing/frontend/template_builder.html
+++ b/app/billing/frontend/template_builder.html
@ -127,6 +127,11 @@
                    <div class="row" id="filesList">
                        <!-- Files loaded dynamically -->
                    </div>
                    <div class="mt-3 text-end">
                        <button class="btn btn-outline-secondary" onclick="skipFileSelection()">
                            Spring over <i class="bi bi-arrow-right ms-2"></i>
                        </button>
                    </div>
                </div>
            </div>
@ -156,6 +161,18 @@
                                <input type="text" class="form-control" id="templateName" placeholder="F.eks. 'BMC Standard Faktura'" required>
                                <small class="text-muted">Navn på templaten, f.eks. leverandør + "Standard" eller "Email faktura"</small>
                            </div>
                            <div class="mb-3">
                                <label class="form-label">Produktkategori <span class="text-danger">*</span></label>
                                <select class="form-select" id="productCategory" required>
                                    <option value="varesalg">🛒 Varesalg (videresalg af hardware)</option>
                                    <option value="drift">🔧 Drift (internet, hosting, cloud services)</option>
                                    <option value="anlæg">🏗️ Anlæg (investeringer, infrastruktur)</option>
                                    <option value="abonnement">📅 Abonnement (løbende services)</option>
                                    <option value="lager">📦 Lager (lagervarer)</option>
                                    <option value="udlejning">🏪 Udlejning</option>
                                </select>
                                <small class="text-muted">Standardkategori for varelinjer fra denne leverandør</small>
                            </div>
                            <button class="btn btn-primary" onclick="validateAndNextStep(3)">
                                Næste <i class="bi bi-arrow-right ms-2"></i>
                            </button>
@ -462,6 +479,137 @@ document.addEventListener('DOMContentLoaded', async () => {
    } else {
        await loadPendingFiles();
        await loadVendors();
        // Check if we're creating a template for a specific vendor/file
        const vendorIdParam = urlParams.get('vendor');
        const fileIdParam = urlParams.get('file');
        // Check for sessionStorage data (from supplier invoices page)
        const storedData = sessionStorage.getItem('templateCreateData');
        let targetFileId = fileIdParam;
        let targetVendorId = vendorIdParam;
        let targetFileName = null;
        let targetPdfText = null;
        if (storedData) {
            try {
                const data = JSON.parse(storedData);
                console.log('🔄 Loaded template creation data from sessionStorage:', data);
                // Override with sessionStorage if available
                if (data.fileId) targetFileId = data.fileId;
                if (data.vendorId) targetVendorId = data.vendorId;
                if (data.pdfText) targetPdfText = data.pdfText;
                targetFileName = data.fileName || data.vendorName || targetFileName;
                // Clear sessionStorage after use
                sessionStorage.removeItem('templateCreateData');
            } catch (error) {
                console.error('Failed to parse template creation data:', error);
            }
        }
        // If we have PDF text from sessionStorage, skip file selection
        if (targetPdfText && targetVendorId && targetFileId) {
            console.log('🚀 Fast-track: Using PDF text from sessionStorage');
            // Set up the file data directly
            currentFile = {
                file_id: targetFileId,
                filename: targetFileName || `File ${targetFileId}`,
                text: targetPdfText
            };
            pdfText = targetPdfText;
            // Wait for vendors to load
            setTimeout(() => {
                // Pre-select vendor
                const vendorSelect = document.getElementById('vendorSelect');
                if (vendorSelect) {
                    vendorSelect.value = targetVendorId;
                    console.log('✅ Vendor pre-selected:', targetVendorId);
                }
                // Auto-generate template name
                const templateNameInput = document.getElementById('templateName');
                if (templateNameInput && !templateNameInput.value) {
                    const vendorName = vendorSelect?.options[vendorSelect.selectedIndex]?.text || 'Template';
                    templateNameInput.value = `${vendorName} Standard Template`;
                    console.log('✅ Template name generated:', templateNameInput.value);
                }
                // Show PDF preview in step 2
                document.getElementById('pdfPreview2').textContent = pdfText;
                // Go directly to step 2
                console.log('🎯 Jumping to step 2 (vendor & template name)');
                nextStep(2);
                // After a moment, auto-advance to step 3
                setTimeout(() => {
                    console.log('🚀 Auto-advancing to step 3 (pattern definition)');
                    validateAndNextStep(3);
                }, 500);
            }, 500);
        }
        // If we have a target file but no PDF text, try to select from pending list
        else if (targetFileId) {
            console.log(`🎯 Auto-selecting file ${targetFileId} (${targetFileName || 'unknown'})`);
            // Wait for files to load, then auto-select
            setTimeout(async () => {
                try {
                    // First check if file exists in the loaded files
                    const filesList = document.getElementById('filesList');
                    console.log('📋 Files list HTML:', filesList.innerHTML.substring(0, 200));
                    // Try to select the file
                    console.log('🔄 Calling selectFile...');
                    await selectFile(parseInt(targetFileId), targetFileName || `File ${targetFileId}`);
                    console.log('✅ selectFile completed');
                    // After file is selected, pre-select vendor if available
                    if (targetVendorId) {
                        console.log(`🎯 Pre-selecting vendor ${targetVendorId}`);
                        // Wait a bit for step 2 to render
                        setTimeout(() => {
                            const vendorSelect = document.getElementById('vendorSelect');
                            if (!vendorSelect) {
                                console.error('❌ vendorSelect not found!');
                                return;
                            }
                            vendorSelect.value = targetVendorId;
                            console.log('✅ Vendor selected:', vendorSelect.value);
                            // If both file and vendor are set, auto-advance to step 3
                            setTimeout(() => {
                                const templateNameInput = document.getElementById('templateName');
                                if (!templateNameInput) {
                                    console.error('❌ templateName input not found!');
                                    return;
                                }
                                if (!templateNameInput.value) {
                                    // Auto-generate template name if empty
                                    const vendorName = vendorSelect.options[vendorSelect.selectedIndex]?.text || 'Template';
                                    templateNameInput.value = `${vendorName} Standard Template`;
                                    console.log('✅ Template name set:', templateNameInput.value);
                                }
                                console.log('🚀 Auto-advancing to step 3 (pattern definition)');
                                validateAndNextStep(3);
                            }, 300);
                        }, 300);
                    }
                } catch (error) {
                    console.error('❌ Failed to auto-select file:', error);
                    alert('Kunne ikke auto-vælge fil: ' + error.message);
                }
            }, 1000); // Increased timeout to 1 second
        }
    }
 });
@ -498,6 +646,11 @@ async function loadExistingTemplate(templateId) {
        await loadVendors();
        document.getElementById('vendorSelect').value = template.vendor_id;
        // Set product category
        if (template.default_product_category) {
            document.getElementById('productCategory').value = template.default_product_category;
        }
        // Load detection patterns
        detectionPatterns = template.detection_patterns || [];
@ -727,30 +880,63 @@ async function loadVendors() {
 async function selectFile(fileId, filename) {
    try {
-        // Reprocess file to get PDF text
+        console.log(`🔄 Selecting file: ${fileId} (${filename})`);
-        const response = await fetch(`/api/v1/supplier-invoices/reprocess/${fileId}`, {
+        
-            method: 'POST'
+        // Get PDF text directly (fast endpoint, no AI processing)
-        });
+        console.log(`📡 Fetching: /api/v1/supplier-invoices/files/${fileId}/pdf-text`);
        const response = await fetch(`/api/v1/supplier-invoices/files/${fileId}/pdf-text`);
        console.log(`📥 Response status: ${response.status}`);
        if (!response.ok) {
            const errorText = await response.text();
            console.error(`❌ HTTP error: ${response.status} - ${errorText}`);
            throw new Error(`HTTP ${response.status}: ${errorText}`);
        }
        const data = await response.json();
        console.log('📦 Response data:', data);
        if (!data.pdf_text) {
            console.warn('⚠️ No PDF text in response');
        }
        currentFile = {
            file_id: fileId,
            filename: filename,
-            text: data.pdf_text
+            text: data.pdf_text || ''
        };
-        pdfText = data.pdf_text;
+        pdfText = data.pdf_text || '';
        console.log(`✅ File loaded, PDF text length: ${pdfText.length} chars`);
        // Show PDF preview
-        document.getElementById('pdfPreview').textContent = pdfText;
+        const pdfPreview = document.getElementById('pdfPreview');
        if (pdfPreview) {
            pdfPreview.textContent = pdfText;
        }
        console.log('🚀 Advancing to step 2');
        nextStep(2);
    } catch (error) {
-        console.error('Failed to load file:', error);
+        console.error('❌ Failed to load file:', error);
-        alert('Kunne ikke hente fil');
+        alert('Kunne ikke hente fil: ' + error.message);
    }
 }
 function skipFileSelection() {
    // Allow user to proceed without selecting a file
    // They can upload/paste PDF text later
    console.log('⏭️ Skipping file selection');
    currentFile = null;
    pdfText = '';
    nextStep(2);
 }
 function validateAndNextStep(targetStep) {
    // Validate step 2 fields
    if (targetStep === 3) {
@ -1289,8 +1475,9 @@ async function autoGenerateTemplate() {
 async function saveTemplate() {
    const vendorId = document.getElementById('vendorSelect').value;
    const templateName = document.getElementById('templateName').value;
    const productCategory = document.getElementById('productCategory').value;
-    console.log('Saving template...', { vendorId, templateName, editingTemplateId });
+    console.log('Saving template...', { vendorId, templateName, productCategory, editingTemplateId });
    console.log('Detection patterns:', detectionPatterns);
    console.log('Field patterns:', fieldPatterns);
@ -1299,6 +1486,11 @@ async function saveTemplate() {
        return;
    }
    if (!productCategory) {
        alert('Vælg produktkategori');
        return;
    }
    if (detectionPatterns.length === 0) {
        alert('Tilføj mindst ét detektionsmønster');
        return;
@ -1378,6 +1570,7 @@ async function saveTemplate() {
            body: JSON.stringify({
                vendor_id: parseInt(vendorId),
                template_name: templateName,
                default_product_category: productCategory,
                detection_patterns: detectionPatternsData,
                field_mappings: fieldMappings
            })
--- a/app/billing/frontend/templates_list.html
+++ b/app/billing/frontend/templates_list.html
@ -56,12 +56,9 @@
 <div class="container mt-4">
    <div class="d-flex justify-content-between align-items-center mb-4">
        <div>
-            <h2><i class="bi bi-grid-3x3 me-2"></i>Faktura Templates</h2>
+            <h2><i class="bi bi-file-earmark-code me-2"></i>Invoice2Data Templates (YAML)</h2>
-            <p class="text-muted">Administrer templates til automatisk faktura-udtrækning</p>
+            <p class="text-muted">YAML-baserede templates til automatisk faktura-udtrækning</p>
        </div>
        <a href="/billing/template-builder" class="btn btn-primary">
            <i class="bi bi-plus-circle me-2"></i>Ny Template
        </a>
    </div>
    <div id="templatesList" class="row">
@ -69,6 +66,63 @@
    </div>
 </div>
 <!-- Edit YAML Category Modal -->
 <div class="modal fade" id="editYamlCategoryModal" tabindex="-1">
    <div class="modal-dialog">
        <div class="modal-content">
            <div class="modal-header">
                <h5 class="modal-title">
                    <i class="bi bi-pencil me-2"></i>Rediger Kategori: <span id="yamlTemplateName"></span>
                </h5>
                <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
            </div>
            <div class="modal-body">
                <div class="mb-3">
                    <label class="form-label">Produkt Kategori</label>
                    <select class="form-select" id="yamlCategorySelect">
                        <option value="varesalg">🛒 Varesalg</option>
                        <option value="drift">🔧 Drift</option>
                        <option value="anlæg">🏗️ Anlæg</option>
                        <option value="abonnement">📅 Abonnement</option>
                        <option value="lager">📦 Lager</option>
                        <option value="udlejning">🏪 Udlejning</option>
                    </select>
                </div>
                <div class="alert alert-info">
                    <i class="bi bi-info-circle me-2"></i>
                    <small>Dette ændrer default_product_category i YAML filen. Filen bliver opdateret på serveren.</small>
                </div>
            </div>
            <div class="modal-footer">
                <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Annuller</button>
                <button type="button" class="btn btn-primary" onclick="saveYamlCategory()">
                    <i class="bi bi-save me-2"></i>Gem Kategori
                </button>
            </div>
        </div>
    </div>
 </div>
 <!-- View YAML Content Modal -->
 <div class="modal fade" id="viewYamlModal" tabindex="-1">
    <div class="modal-dialog modal-lg">
        <div class="modal-content">
            <div class="modal-header">
                <h5 class="modal-title">
                    <i class="bi bi-file-earmark-code me-2"></i>YAML Indhold: <span id="viewYamlTemplateName"></span>
                </h5>
                <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
            </div>
            <div class="modal-body">
                <pre id="yamlContent" style="background: #f8f9fa; padding: 15px; border-radius: 8px; max-height: 600px; overflow-y: auto;"><code></code></pre>
            </div>
            <div class="modal-footer">
                <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Luk</button>
            </div>
        </div>
    </div>
 </div>
 <!-- Test Modal -->
 <div class="modal fade test-modal" id="testModal" tabindex="-1">
    <div class="modal-dialog modal-xl">
@ -116,6 +170,7 @@
 <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js"></script>
 <script>
 let currentTemplateId = null;
 let currentTemplateIsInvoice2data = false;
 document.addEventListener('DOMContentLoaded', async () => {
    await loadTemplates();
@ -142,36 +197,65 @@ async function loadTemplates() {
            return;
        }
-        templates.forEach(template => {
+        // Filter to only show invoice2data templates
        const invoice2dataTemplates = templates.filter(t => t.template_type === 'invoice2data');
        if (invoice2dataTemplates.length === 0) {
            container.innerHTML = `
                <div class="col-12">
                    <div class="alert alert-info">
                        <i class="bi bi-info-circle me-2"></i>
                        Ingen YAML templates endnu. Opret .yml filer i <code>data/invoice_templates/</code>
                    </div>
                </div>
            `;
            return;
        }
        invoice2dataTemplates.forEach(template => {
            const detectionPatterns = template.detection_patterns || [];
            const fieldMappings = template.field_mappings || {};
            const fieldCount = Object.keys(fieldMappings).filter(k => !['lines_start', 'lines_end', 'line_item'].includes(k)).length;
            const category = template.default_product_category || 'varesalg';
            const categoryIcons = {
                'varesalg': '🛒',
                'drift': '🔧', 
                'anlæg': '🏗️',
                'abonnement': '📅',
                'lager': '📦',
                'udlejning': '🏪'
            };
            const categoryIcon = categoryIcons[category] || '📦';
            container.innerHTML += `
                <div class="col-md-4 mb-3">
                    <div class="card template-card">
                        <div class="card-body">
-                            <h5 class="card-title">
+                            <div class="d-flex justify-content-between align-items-start mb-2">
-                                <i class="bi bi-file-text me-2"></i>${template.template_name}
+                                <h5 class="card-title mb-0">
-                            </h5>
+                                    <i class="bi bi-file-earmark-code me-2"></i>${template.template_name}
                                </h5>
                                <span class="badge bg-success">YAML</span>
                            </div>
                            <p class="card-text text-muted mb-2">
                                <small>
-                                    <i class="bi bi-building me-1"></i>${template.vendor_name || 'Ingen leverandør'}<br>
+                                    <i class="bi bi-building me-1"></i>${template.vendor_name || 'Ingen leverandør'}
-                                    <i class="bi bi-check-circle me-1"></i>${detectionPatterns.length} detektionsmønstre<br>
+                                    ${template.vendor_cvr ? `<br><i class="bi bi-hash me-1"></i>CVR: ${template.vendor_cvr}` : ''}
-                                    <i class="bi bi-input-cursor me-1"></i>${fieldCount} felter<br>
+                                    <br><i class="bi bi-check-circle me-1"></i>${detectionPatterns.length} detektionsmønstre
-                                    <i class="bi bi-graph-up me-1"></i>${template.usage_count || 0} gange brugt
+                                    <br><i class="bi bi-input-cursor me-1"></i>${fieldCount} felter
                                    <br><strong>${categoryIcon} Kategori: ${category}</strong>
                                </small>
                            </p>
-                            <div class="d-flex gap-2">
+                            <div class="d-flex gap-2 flex-wrap">
-                                <button class="btn btn-sm btn-primary" onclick="editTemplate(${template.template_id})">
+                                <button class="btn btn-sm btn-primary" onclick="viewYamlContent('${template.yaml_filename}')" title="Vis YAML indhold">
-                                    <i class="bi bi-pencil"></i> Rediger
+                                    <i class="bi bi-file-earmark-code"></i> Vis YAML
                                </button>
-                                <button class="btn btn-sm btn-info" onclick="openTestModal(${template.template_id}, '${template.template_name}')">
+                                <button class="btn btn-sm btn-warning" onclick="editYamlCategory('${template.yaml_filename}', '${category}')" title="Rediger kategori">
                                    <i class="bi bi-pencil"></i> Kategori
                                </button>
                                <button class="btn btn-sm btn-info" onclick="openTestModal('${template.yaml_filename}', '${template.template_name}', true, ${template.vendor_id || 'null'})">
                                    <i class="bi bi-flask"></i> Test
                                </button>
                                <button class="btn btn-sm btn-danger" onclick="deleteTemplate(${template.template_id})">
                                    <i class="bi bi-trash"></i>
                                </button>
                            </div>
                        </div>
                    </div>
@ -211,22 +295,31 @@ async function loadPendingFiles(vendorId = null) {
    }
 }
-async function openTestModal(templateId, templateName) {
+async function openTestModal(templateId, templateName, isInvoice2data = false, vendorId = null) {
    currentTemplateId = templateId;
    currentTemplateIsInvoice2data = isInvoice2data;
    document.getElementById('modalTemplateName').textContent = templateName;
    document.getElementById('testResultsContainer').classList.add('d-none');
    document.getElementById('testFileSelect').value = '';
-    // Load template to get vendor_id
+    // For invoice2data templates, use vendorId if provided
-    try {
+    if (isInvoice2data && vendorId) {
-        const response = await fetch(`/api/v1/supplier-invoices/templates/${templateId}`);
+        await loadPendingFiles(vendorId);
-        const template = await response.json();
+    } else if (!isInvoice2data) {
        // Load database template to get vendor_id
        try {
            const response = await fetch(`/api/v1/supplier-invoices/templates/${templateId}`);
            const template = await response.json();
-        // Reload files filtered by this template's vendor
+            // Reload files filtered by this template's vendor
-        await loadPendingFiles(template.vendor_id);
+            await loadPendingFiles(template.vendor_id);
-    } catch (error) {
+        } catch (error) {
-        console.error('Failed to load template:', error);
+            console.error('Failed to load template:', error);
-        await loadPendingFiles(); // Fallback to all files
+            await loadPendingFiles(); // Fallback to all files
        }
    } else {
        // No vendor - load all files
        await loadPendingFiles();
    }
    const modal = new bootstrap.Modal(document.getElementById('testModal'));
@ -258,8 +351,15 @@ async function runTest() {
        document.getElementById('testPdfPreview').textContent = pdfText;
        document.getElementById('testResultsContainer').classList.remove('d-none');
-        // Test template
+        // Test template - use different endpoint based on type
-        const testResponse = await fetch(`/api/v1/supplier-invoices/templates/${currentTemplateId}/test`, {
+        let testUrl;
        if (currentTemplateIsInvoice2data) {
            testUrl = `/api/v1/supplier-invoices/templates/invoice2data/${currentTemplateId}/test`;
        } else {
            testUrl = `/api/v1/supplier-invoices/templates/${currentTemplateId}/test`;
        }
        const testResponse = await fetch(testUrl, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ pdf_text: pdfText })
@ -303,21 +403,26 @@ async function runTest() {
                        <thead>
                            <tr>
                                <th>#</th>
                                ${lineItems[0].item_number ? '<th>Varenr</th>' : ''}
                                ${lineItems[0].description ? '<th>Beskrivelse</th>' : ''}
                                ${lineItems[0].quantity ? '<th>Antal</th>' : ''}
                                ${lineItems[0].unit_price ? '<th>Pris</th>' : ''}
                                ${lineItems.some(l => l.circuit_id || l.ip_address) ? '<th>Kredsløb/IP</th>' : ''}
                                ${lineItems.some(l => l.location_street) ? '<th>Adresse</th>' : ''}
                            </tr>
                        </thead>
                        <tbody>`;
-            lineItems.forEach(line => {
+            lineItems.forEach((line, idx) => {
                const locationText = [line.location_street, line.location_zip, line.location_city].filter(x => x).join(' ');
                const circuitText = line.circuit_id || line.ip_address || '';
                linesHtml += `<tr>
-                    <td>${line.line_number}</td>
+                    <td>${idx + 1}</td>
                    ${line.item_number ? `<td>${line.item_number}</td>` : ''}
                    ${line.description ? `<td>${line.description}</td>` : ''}
                    ${line.quantity ? `<td>${line.quantity}</td>` : ''}
                    ${line.unit_price ? `<td>${line.unit_price}</td>` : ''}
                    ${lineItems.some(l => l.circuit_id || l.ip_address) ? `<td><small>${circuitText}</small></td>` : ''}
                    ${lineItems.some(l => l.location_street) ? `<td><small>${locationText}</small></td>` : ''}
                </tr>`;
            });
@ -362,32 +467,65 @@ async function runTest() {
    }
 }
-async function deleteTemplate(templateId) {
+let currentYamlTemplate = null;
-    if (!confirm('Er du sikker på at du vil slette denne template?')) {
+
 async function viewYamlContent(yamlFilename) {
    try {
        const response = await fetch(`/api/v1/supplier-invoices/templates/invoice2data/${yamlFilename}/content`);
        if (!response.ok) {
            throw new Error('Kunne ikke hente YAML indhold');
        }
        const data = await response.json();
        document.getElementById('viewYamlTemplateName').textContent = yamlFilename + '.yml';
        document.getElementById('yamlContent').querySelector('code').textContent = data.content;
        const modal = new bootstrap.Modal(document.getElementById('viewYamlModal'));
        modal.show();
    } catch (error) {
        console.error('Failed to load YAML content:', error);
        alert('❌ Kunne ikke hente YAML indhold');
    }
 }
 function editYamlCategory(yamlFilename, currentCategory) {
    currentYamlTemplate = yamlFilename;
    document.getElementById('yamlTemplateName').textContent = yamlFilename + '.yml';
    document.getElementById('yamlCategorySelect').value = currentCategory;
    const modal = new bootstrap.Modal(document.getElementById('editYamlCategoryModal'));
    modal.show();
 }
 async function saveYamlCategory() {
    const newCategory = document.getElementById('yamlCategorySelect').value;
    if (!currentYamlTemplate) {
        alert('Ingen template valgt');
        return;
    }
    try {
-        const response = await fetch(`/api/v1/supplier-invoices/templates/${templateId}`, {
+        const response = await fetch(`/api/v1/supplier-invoices/templates/invoice2data/${currentYamlTemplate}/category`, {
-            method: 'DELETE'
+            method: 'PUT',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ category: newCategory })
        });
        if (response.ok) {
-            alert('✅ Template slettet');
+            alert('✅ Kategori opdateret i YAML fil');
-            await loadTemplates();
+            bootstrap.Modal.getInstance(document.getElementById('editYamlCategoryModal')).hide();
            await loadTemplates(); // Reload to show new category
        } else {
-            throw new Error('Sletning fejlede');
+            const error = await response.json();
            throw new Error(error.detail || 'Opdatering fejlede');
        }
    } catch (error) {
-        console.error('Delete failed:', error);
+        console.error('Category update failed:', error);
-        alert('❌ Kunne ikke slette template');
+        alert('❌ Kunne ikke opdatere kategori: ' + error.message);
    }
 }
 function editTemplate(templateId) {
    // Redirect to template builder with template ID
    window.location.href = `/billing/template-builder?id=${templateId}`;
 }
 </script>
 </body>
--- a/app/services/economic_service.py
+++ b/app/services/economic_service.py
@ -271,6 +271,54 @@ class EconomicService:
    # ========== KASSEKLADDE (JOURNALS/VOUCHERS) ==========
    async def check_invoice_number_exists(self, invoice_number: str, journal_number: Optional[int] = None) -> Optional[Dict]:
        """
        Check if an invoice number already exists in e-conomic journals
        Args:
            invoice_number: Invoice number to check
            journal_number: Optional specific journal to search (if None, searches all)
        Returns:
            Dict with voucher info if found, None otherwise
        """
        try:
            # Search in vouchers (posted journal entries)
            url = f"{self.api_url}/vouchers"
            params = {
                'filter': f'voucherNumber${invoice_number}',  # e-conomic filter syntax
                'pagesize': 100
            }
            async with aiohttp.ClientSession() as session:
                async with session.get(url, headers=self._get_headers(), params=params) as response:
                    if response.status != 200:
                        logger.warning(f"⚠️ Failed to search vouchers: {response.status}")
                        return None
                    data = await response.json()
                    vouchers = data.get('collection', [])
                    # Check if any voucher matches the invoice number
                    for voucher in vouchers:
                        # Check if invoice number appears in voucher text or entries
                        if invoice_number in str(voucher):
                            logger.warning(f"⚠️ Invoice number {invoice_number} found in e-conomic voucher #{voucher.get('voucherNumber')}")
                            return {
                                'found_in': 'e-conomic',
                                'voucher_number': voucher.get('voucherNumber'),
                                'date': voucher.get('date'),
                                'journal': voucher.get('journal', {}).get('journalNumber')
                            }
            logger.info(f"✅ Invoice number {invoice_number} not found in e-conomic")
            return None
        except Exception as e:
            logger.error(f"❌ Error checking invoice number in e-conomic: {e}")
            # Don't block on e-conomic errors - assume not found
            return None
    async def get_supplier_invoice_journals(self) -> list:
        """
        Get all available journals for supplier invoices (kassekladde)
--- a/app/services/invoice2data_service.py
+++ b/app/services/invoice2data_service.py
@ -0,0 +1,337 @@
 """
 Invoice2Data Service
 Wrapper around invoice2data library for template-based invoice extraction
 """
 import logging
 import re
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Any
 import yaml
 logger = logging.getLogger(__name__)
 class Invoice2DataService:
    """Service for extracting invoice data using invoice2data templates"""
    def __init__(self):
        self.template_dir = Path(__file__).parent.parent.parent / "data" / "invoice_templates"
        self.templates = self._load_templates()
        logger.info(f"📋 Loaded {len(self.templates)} invoice2data templates")
    def _load_templates(self) -> Dict[str, Dict]:
        """Load all YAML templates from template directory"""
        templates = {}
        if not self.template_dir.exists():
            logger.warning(f"Template directory not found: {self.template_dir}")
            return templates
        for template_file in self.template_dir.glob("*.yml"):
            try:
                with open(template_file, 'r', encoding='utf-8') as f:
                    template_data = yaml.safe_load(f)
                    template_name = template_file.stem
                    templates[template_name] = template_data
                    logger.debug(f"  ✓ Loaded template: {template_name}")
            except Exception as e:
                logger.error(f"  ✗ Failed to load template {template_file}: {e}")
        return templates
    def match_template(self, text: str) -> Optional[str]:
        """
        Find matching template based on keywords
        Returns template name or None
        """
        text_lower = text.lower()
        for template_name, template_data in self.templates.items():
            keywords = template_data.get('keywords', [])
            # Check if all keywords are present
            matches = sum(1 for keyword in keywords if str(keyword).lower() in text_lower)
            if matches >= len(keywords) * 0.7:  # 70% of keywords must match
                logger.info(f"✅ Matched template: {template_name} ({matches}/{len(keywords)} keywords)")
                return template_name
        logger.warning("⚠️ No template matched")
        return None
    def extract_with_template(self, text: str, template_name: str) -> Dict[str, Any]:
        """
        Extract invoice data using specific template
        """
        if template_name not in self.templates:
            raise ValueError(f"Template not found: {template_name}")
        template = self.templates[template_name]
        fields = template.get('fields', {})
        options = template.get('options', {})
        extracted = {
            'template': template_name,
            'issuer': template.get('issuer'),
            'country': template.get('country'),
            'currency': options.get('currency', 'DKK')
        }
        # Extract each field using its regex
        for field_name, field_config in fields.items():
            if field_config.get('parser') != 'regex':
                continue
            pattern = field_config.get('regex')
            field_type = field_config.get('type', 'string')
            group = field_config.get('group', 1)
            try:
                match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE)
                if match:
                    value = match.group(group).strip()
                    logger.debug(f"  🔍 Extracted raw value for {field_name}: '{value}' (type: {field_type})")
                    # Handle CVR filtering (avoid customer CVR)
                    if field_name == 'vendor_vat':
                        # Find ALL CVR numbers
                        all_cvr_matches = re.finditer(r'SE/CVR-nr\.\s+(\d{8})', text, re.IGNORECASE)
                        cvr_numbers = [m.group(1) for m in all_cvr_matches]
                        # Filter out BMC's CVR (29522790)
                        vendor_cvrs = [cvr for cvr in cvr_numbers if cvr != '29522790']
                        if vendor_cvrs:
                            value = vendor_cvrs[0]
                            logger.debug(f"  ✓ {field_name}: {value} (filtered from {cvr_numbers})")
                        else:
                            logger.warning(f"  ⚠️ Only customer CVR found, no vendor CVR")
                            continue
                    # Convert type
                    if field_type == 'float':
                        # Handle Danish number format (1.234,56 → 1234.56)
                        # OR (148,587.98 → 148587.98) - handle both formats
                        decimal_sep = options.get('decimal_separator', ',')
                        thousands_sep = options.get('thousands_separator', '.')
                        # Remove all spaces first
                        value = value.replace(' ', '')
                        # If both separators are present, we can determine the format
                        # Danish: 148.587,98 (thousands=., decimal=,)
                        # English: 148,587.98 (thousands=, decimal=.)
                        if thousands_sep in value and decimal_sep in value:
                            # Remove thousands separator, then convert decimal separator to .
                            value = value.replace(thousands_sep, '').replace(decimal_sep, '.')
                        elif thousands_sep in value:
                            # Only thousands separator present - just remove it
                            value = value.replace(thousands_sep, '')
                        elif decimal_sep in value and decimal_sep == ',':
                            # Only decimal separator and it's Danish comma - convert to .
                            value = value.replace(',', '.')
                        value = float(value)
                    elif field_type == 'int':
                        value = int(value)
                    elif field_type == 'date':
                        # Try to parse Danish dates
                        date_formats = options.get('date_formats', ['%B %d, %Y', '%d-%m-%Y'])
                        # Danish month names
                        value = value.replace('januar', 'January').replace('februar', 'February')
                        value = value.replace('marts', 'March').replace('april', 'April')
                        value = value.replace('maj', 'May').replace('juni', 'June')
                        value = value.replace('juli', 'July').replace('august', 'August')
                        value = value.replace('september', 'September').replace('oktober', 'October')
                        value = value.replace('november', 'November').replace('december', 'December')
                        for date_format in date_formats:
                            try:
                                parsed_date = datetime.strptime(value, date_format)
                                value = parsed_date.strftime('%Y-%m-%d')
                                break
                            except ValueError:
                                continue
                    extracted[field_name] = value
                    logger.debug(f"  ✓ {field_name}: {value}")
                else:
                    logger.debug(f"  ✗ {field_name}: No match")
            except Exception as e:
                logger.warning(f"  ✗ Failed to extract {field_name}: {e}")
        # Extract line items if defined in template
        lines_config = template.get('lines', [])
        if lines_config:
            extracted['lines'] = self._extract_lines(text, lines_config, options)
        return extracted
    def _extract_lines(self, text: str, lines_configs: List[Dict], options: Dict) -> List[Dict]:
        """Extract line items from invoice text"""
        all_lines = []
        logger.debug(f"🔍 Extracting lines with {len(lines_configs)} configurations")
        for lines_config in lines_configs:
            start_pattern = lines_config.get('start')
            end_pattern = lines_config.get('end')
            line_config = lines_config.get('line', {})
            if not start_pattern or not line_config:
                continue
            try:
                # Find section between start and end patterns
                if end_pattern:
                    section_pattern = f"{start_pattern}(.*?){end_pattern}"
                    section_match = re.search(section_pattern, text, re.DOTALL | re.IGNORECASE)
                else:
                    section_pattern = f"{start_pattern}(.*?)$"
                    section_match = re.search(section_pattern, text, re.DOTALL | re.IGNORECASE)
                if not section_match:
                    logger.debug(f"  ✗ Line section not found (start: {start_pattern[:50]}, end: {end_pattern[:50] if end_pattern else 'None'})")
                    continue
                section_text = section_match.group(1)
                logger.debug(f"  ✓ Found line section ({len(section_text)} chars)")
                # Extract individual lines
                line_pattern = line_config.get('regex')
                field_names = line_config.get('fields', [])
                field_types = line_config.get('types', {})
                context_config = line_config.get('context_before', {})
                if not line_pattern or not field_names:
                    continue
                # Split section into lines for context processing
                section_lines = section_text.split('\n')
                line_matches = []
                # Find all matching lines with their indices
                for line_idx, line_text in enumerate(section_lines):
                    match = re.search(line_pattern, line_text, re.MULTILINE)
                    if match:
                        line_matches.append((line_idx, line_text, match))
                logger.debug(f"  ✓ Found {len(line_matches)} matching lines")
                for line_idx, line_text, match in line_matches:
                    line_data = {}
                    # Extract main line fields
                    for idx, field_name in enumerate(field_names, start=1):
                        try:
                            value = match.group(idx).strip()
                            field_type = field_types.get(field_name, 'string')
                            # Convert type
                            if field_type == 'float':
                                thousands_sep = options.get('thousands_separator', ',')
                                decimal_sep = options.get('decimal_separator', '.')
                                value = value.replace(' ', '')
                                if thousands_sep in value and decimal_sep in value:
                                    value = value.replace(thousands_sep, '').replace(decimal_sep, '.')
                                elif thousands_sep in value:
                                    value = value.replace(thousands_sep, '')
                                elif decimal_sep in value and decimal_sep == ',':
                                    value = value.replace(',', '.')
                                value = float(value)
                            elif field_type == 'int':
                                value = int(value)
                            line_data[field_name] = value
                        except Exception as e:
                            logger.debug(f"  ✗ Failed to extract line field {field_name}: {e}")
                    # Extract context_before if configured
                    if context_config and line_idx > 0:
                        max_lines = context_config.get('max_lines', 5)
                        patterns = context_config.get('patterns', [])
                        # Look at lines BEFORE this line
                        start_idx = max(0, line_idx - max_lines)
                        context_lines = section_lines[start_idx:line_idx]
                        for pattern_config in patterns:
                            pattern_regex = pattern_config.get('regex')
                            pattern_fields = pattern_config.get('fields', [])
                            if not pattern_regex or not pattern_fields:
                                continue
                            # Try to match against context lines (most recent first)
                            for ctx_line in reversed(context_lines):
                                ctx_match = re.search(pattern_regex, ctx_line)
                                if ctx_match:
                                    # Extract fields from context
                                    for ctx_idx, ctx_field_name in enumerate(pattern_fields, start=1):
                                        try:
                                            ctx_value = ctx_match.group(ctx_idx).strip()
                                            line_data[ctx_field_name] = ctx_value
                                        except Exception as e:
                                            logger.debug(f"  ✗ Failed to extract context field {ctx_field_name}: {e}")
                                    break  # Stop after first match for this pattern
                    if line_data:
                        all_lines.append(line_data)
                logger.info(f"  ✓ Extracted {len(all_lines)} line items")
            except Exception as e:
                logger.warning(f"  ✗ Failed to extract lines: {e}")
        return all_lines
    def extract(self, text: str, template_name: Optional[str] = None) -> Optional[Dict[str, Any]]:
        """
        Extract invoice data from text
        If template_name is None, auto-detect template
        """
        try:
            # Auto-detect template if not specified
            if template_name is None:
                template_name = self.match_template(text)
                if template_name is None:
                    return None
            # Extract with template
            result = self.extract_with_template(text, template_name)
            logger.info(f"✅ Extracted {len(result)} fields using template: {template_name}")
            return result
        except Exception as e:
            logger.error(f"❌ Extraction failed: {e}")
            return None
    def get_template_list(self) -> List[Dict[str, str]]:
        """Get list of available templates"""
        return [
            {
                'name': name,
                'issuer': template.get('issuer'),
                'country': template.get('country')
            }
            for name, template in self.templates.items()
        ]
 # Singleton instance
 _invoice2data_service = None
 def get_invoice2data_service() -> Invoice2DataService:
    """Get singleton instance of Invoice2Data service"""
    global _invoice2data_service
    if _invoice2data_service is None:
        _invoice2data_service = Invoice2DataService()
    return _invoice2data_service
--- a/app/services/ollama_service.py
+++ b/app/services/ollama_service.py
@ -437,6 +437,130 @@ Output: {
        }
        return mime_types.get(suffix, 'application/octet-stream')
    async def quick_analysis_on_upload(self, pdf_text: str) -> Dict:
        """
        Quick analysis when file is uploaded - extracts critical fields only:
        - CVR number (to match vendor)
        - Document type (invoice vs credit note)
        - Invoice/credit note number
        This runs BEFORE template matching for early vendor detection.
        Args:
            pdf_text: Extracted text from PDF
        Returns:
            Dict with cvr, document_type, document_number, vendor_id, vendor_name, is_own_invoice
        """
        from app.core.config import settings
        logger.info("⚡ Running quick analysis on upload...")
        result = {
            "cvr": None,
            "document_type": None,  # 'invoice' or 'credit_note'
            "document_number": None,
            "vendor_id": None,
            "vendor_name": None,
            "is_own_invoice": False  # True if this is an outgoing invoice (BMC's own CVR)
        }
        # 1. FIND CVR NUMBER (8 digits)
        # Look for patterns like "CVR: 12345678", "CVR-nr.: 12345678", "CVR 12345678"
        # Important: Supplier invoices have BOTH buyer (BMC=29522790) and seller CVR
        # We need the SELLER's CVR (not BMC's own)
        cvr_patterns = [
            r'CVR[:\-\s]*(\d{8})',
            r'CVR[:\-\s]*nr\.?\s*(\d{8})',
            r'CVR[:\-\s]*nummer\s*(\d{8})',
            r'SE[:\-\s]*(\d{8})',  # SE = Svensk CVR, men også brugt i DK
            r'\b(\d{8})\b'  # Fallback: any 8-digit number
        ]
        # Find ALL CVR numbers in document
        found_cvrs = []
        for pattern in cvr_patterns:
            matches = re.finditer(pattern, pdf_text, re.IGNORECASE)
            for match in matches:
                cvr_candidate = match.group(1)
                # Validate it's a real CVR (starts with 1-4, not a random number)
                if cvr_candidate[0] in '1234' and cvr_candidate not in found_cvrs:
                    found_cvrs.append(cvr_candidate)
        # Remove BMC's own CVR from list (buyer CVR, not seller)
        vendor_cvrs = [cvr for cvr in found_cvrs if cvr != settings.OWN_CVR]
        if settings.OWN_CVR in found_cvrs:
            # This is a proper invoice where BMC is the buyer
            if len(vendor_cvrs) > 0:
                # Found vendor CVR - use the first non-BMC CVR
                result['cvr'] = vendor_cvrs[0]
                logger.info(f"📋 Found vendor CVR: {vendor_cvrs[0]} (ignored BMC CVR: {settings.OWN_CVR})")
                # Try to match vendor
                vendor = self.match_vendor_by_cvr(vendor_cvrs[0])
                if vendor:
                    result['vendor_id'] = vendor['id']
                    result['vendor_name'] = vendor['name']
            else:
                # Only BMC's CVR found = this is an outgoing invoice
                result['is_own_invoice'] = True
                result['cvr'] = settings.OWN_CVR
                logger.warning(f"⚠️ OUTGOING INVOICE: Only BMC CVR found")
        elif len(vendor_cvrs) > 0:
            # No BMC CVR, but other CVR found - use first one
            result['cvr'] = vendor_cvrs[0]
            logger.info(f"📋 Found CVR: {vendor_cvrs[0]}")
            vendor = self.match_vendor_by_cvr(vendor_cvrs[0])
            if vendor:
                result['vendor_id'] = vendor['id']
                result['vendor_name'] = vendor['name']
        # 2. DETECT DOCUMENT TYPE (Invoice vs Credit Note)
        credit_keywords = [
            'kreditnota', 'credit note', 'creditnote', 'kreditfaktura',
            'refusion', 'tilbagebetaling', 'godtgørelse', 'tilbageførsel'
        ]
        text_lower = pdf_text.lower()
        is_credit_note = any(keyword in text_lower for keyword in credit_keywords)
        if is_credit_note:
            result['document_type'] = 'credit_note'
            logger.info("📄 Document type: CREDIT NOTE")
        else:
            result['document_type'] = 'invoice'
            logger.info("📄 Document type: INVOICE")
        # 3. EXTRACT DOCUMENT NUMBER
        # For invoices: "Faktura nr.", "Invoice number:", "Fakturanr."
        # For credit notes: "Kreditnota nr.", "Credit note number:"
        if result['document_type'] == 'credit_note':
            number_patterns = [
                r'kreditnota\s*(?:nr\.?|nummer)[:\s]*(\S+)',
                r'credit\s*note\s*(?:no\.?|number)[:\s]*(\S+)',
                r'kreditfaktura\s*(?:nr\.?|nummer)[:\s]*(\S+)',
            ]
        else:
            number_patterns = [
                r'faktura\s*(?:nr\.?|nummer)[:\s]*(\S+)',
                r'invoice\s*(?:no\.?|number)[:\s]*(\S+)',
                r'fakturanr\.?\s*[:\s]*(\S+)',
            ]
        for pattern in number_patterns:
            match = re.search(pattern, pdf_text, re.IGNORECASE)
            if match:
                result['document_number'] = match.group(1).strip()
                logger.info(f"🔢 Document number: {result['document_number']}")
                break
        logger.info(f"✅ Quick analysis complete: CVR={result['cvr']}, Type={result['document_type']}, Number={result['document_number']}, Vendor={result['vendor_name']}")
        return result
    def match_vendor_by_cvr(self, vendor_cvr: Optional[str]) -> Optional[Dict]:
        """
        Match vendor from database using CVR number
@ -459,7 +583,7 @@ Output: {
        # Search vendors table
        vendor = execute_query(
-            "SELECT * FROM vendors WHERE cvr = %s",
+            "SELECT * FROM vendors WHERE cvr_number = %s",
            (cvr_clean,),
            fetchone=True
        )
--- a/app/services/template_service.py
+++ b/app/services/template_service.py
@ -1,6 +1,6 @@
 """
 Supplier Invoice Template Service
-Simple template-based invoice field extraction (no AI)
+Hybrid approach: invoice2data templates + custom regex templates
 Inspired by OmniSync's invoice template system
 """
@ -11,6 +11,7 @@ from datetime import datetime
 from pathlib import Path
 from app.core.database import execute_query, execute_insert, execute_update
 from app.services.invoice2data_service import get_invoice2data_service
 logger = logging.getLogger(__name__)
@ -21,12 +22,19 @@ class TemplateService:
    def __init__(self):
        self.templates_cache = {}
        self._initialized = False
        self.invoice2data = None
    def _ensure_loaded(self):
        """Lazy load templates on first use"""
        if not self._initialized:
            logger.info("🔄 Lazy loading templates...")
            self._load_templates()
            # Also load invoice2data templates
            try:
                self.invoice2data = get_invoice2data_service()
                logger.info(f"✅ Invoice2Data service initialized")
            except Exception as e:
                logger.warning(f"⚠️ Failed to load invoice2data: {e}")
            self._initialized = True
    def _load_templates(self):
@ -51,11 +59,24 @@ class TemplateService:
    def match_template(self, pdf_text: str) -> Tuple[Optional[int], float]:
        """
        Find best matching template for PDF text
        First tries invoice2data templates, then falls back to custom templates
        Returns: (template_id, confidence_score)
        """
        self._ensure_loaded()  # Lazy load templates
-        logger.info(f"🔍 Matching against {len(self.templates_cache)} templates")
+        # Try invoice2data templates first
        if self.invoice2data:
            try:
                template_name = self.invoice2data.match_template(pdf_text)
                if template_name:
                    logger.info(f"✅ Matched invoice2data template: {template_name}")
                    # Return special ID to indicate invoice2data template
                    return (-1, 1.0)  # -1 = invoice2data, 100% confidence
            except Exception as e:
                logger.warning(f"⚠️ Invoice2data matching failed: {e}")
        # Fallback to custom templates
        logger.info(f"🔍 Matching against {len(self.templates_cache)} custom templates")
        best_match = None
        best_score = 0.0
@ -112,6 +133,19 @@ class TemplateService:
        """Extract invoice fields using template's regex patterns"""
        self._ensure_loaded()  # Lazy load templates
        # Check if this is an invoice2data template
        if template_id == -1:
            if self.invoice2data:
                try:
                    result = self.invoice2data.extract(pdf_text)
                    if result:
                        logger.info(f"✅ Extracted fields using invoice2data")
                        return result
                except Exception as e:
                    logger.error(f"❌ Invoice2data extraction failed: {e}")
            return {}
        # Use custom template
        template = self.templates_cache.get(template_id)
        if not template:
            logger.warning(f"⚠️ Template {template_id} not found in cache")
@ -128,11 +162,36 @@ class TemplateService:
                continue
            try:
-                match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE)
+                # Special handling for CVR to avoid extracting own CVR
-                if match and len(match.groups()) >= group:
+                if field_name == 'vendor_cvr':
-                    value = match.group(group).strip()
+                    from app.core.config import settings
-                    extracted[field_name] = value
+                    own_cvr = getattr(settings, 'OWN_CVR', '29522790')
-                    logger.debug(f"  ✓ {field_name}: {value}")
+                    
                    # Find ALL CVR matches
                    all_matches = list(re.finditer(pattern, pdf_text, re.IGNORECASE | re.MULTILINE))
                    found_cvrs = []
                    for match in all_matches:
                        if len(match.groups()) >= group:
                            cvr = match.group(group).strip()
                            found_cvrs.append(cvr)
                    # Filter out own CVR
                    vendor_cvrs = [cvr for cvr in found_cvrs if cvr != own_cvr]
                    if vendor_cvrs:
                        # Use first non-own CVR as vendor CVR
                        extracted[field_name] = vendor_cvrs[0]
                        logger.debug(f"  ✓ {field_name}: {vendor_cvrs[0]} (filtered out own CVR: {own_cvr})")
                    else:
                        logger.warning(f"  ⚠️ Only found own CVR ({own_cvr}), no vendor CVR found")
                else:
                    # Normal extraction for other fields
                    match = re.search(pattern, pdf_text, re.IGNORECASE | re.MULTILINE)
                    if match and len(match.groups()) >= group:
                        value = match.group(group).strip()
                        extracted[field_name] = value
                        logger.debug(f"  ✓ {field_name}: {value}")
            except Exception as e:
                logger.warning(f"  ✗ Failed to extract {field_name}: {e}")
--- a/migrations/011_extraction_lines_context.sql
+++ b/migrations/011_extraction_lines_context.sql
@ -0,0 +1,18 @@
 -- Migration 011: Add context fields to extraction_lines
 -- These fields capture additional context information from invoice line items
 ALTER TABLE extraction_lines
 ADD COLUMN IF NOT EXISTS ip_address VARCHAR(50),
 ADD COLUMN IF NOT EXISTS contract_number VARCHAR(100),
 ADD COLUMN IF NOT EXISTS location_street VARCHAR(255),
 ADD COLUMN IF NOT EXISTS location_zip VARCHAR(10),
 ADD COLUMN IF NOT EXISTS location_city VARCHAR(100);
 -- Add index for contract number lookups
 CREATE INDEX IF NOT EXISTS idx_extraction_lines_contract_number ON extraction_lines(contract_number);
 COMMENT ON COLUMN extraction_lines.ip_address IS 'IP address/subnet from line context (e.g., 152.115.56.192/27)';
 COMMENT ON COLUMN extraction_lines.contract_number IS 'Contract number from line context (e.g., NKA-008225)';
 COMMENT ON COLUMN extraction_lines.location_street IS 'Street address from line context';
 COMMENT ON COLUMN extraction_lines.location_zip IS 'Zip code from line context';
 COMMENT ON COLUMN extraction_lines.location_city IS 'City from line context';
--- a/migrations/011_quick_analysis.sql
+++ b/migrations/011_quick_analysis.sql
@ -0,0 +1,19 @@
 -- Migration 011: Quick Analysis on Upload
 -- Adds fields to store automatic CVR, document type, and document number detection
 -- Add quick analysis fields to incoming_files
 ALTER TABLE incoming_files 
 ADD COLUMN IF NOT EXISTS detected_cvr VARCHAR(8),
 ADD COLUMN IF NOT EXISTS detected_vendor_id INTEGER REFERENCES vendors(id),
 ADD COLUMN IF NOT EXISTS detected_document_type VARCHAR(20),  -- 'invoice' or 'credit_note'
 ADD COLUMN IF NOT EXISTS detected_document_number VARCHAR(100);
 -- Add index for CVR lookups
 CREATE INDEX IF NOT EXISTS idx_incoming_files_detected_cvr ON incoming_files(detected_cvr);
 CREATE INDEX IF NOT EXISTS idx_incoming_files_detected_vendor ON incoming_files(detected_vendor_id);
 -- Add comments
 COMMENT ON COLUMN incoming_files.detected_cvr IS 'Automatically detected CVR number from PDF text';
 COMMENT ON COLUMN incoming_files.detected_vendor_id IS 'Vendor matched by CVR on upload';
 COMMENT ON COLUMN incoming_files.detected_document_type IS 'Auto-detected: invoice or credit_note';
 COMMENT ON COLUMN incoming_files.detected_document_number IS 'Automatically extracted invoice/credit note number';
--- a/migrations/012_own_invoice_filter.sql
+++ b/migrations/012_own_invoice_filter.sql
@ -0,0 +1,20 @@
 -- Migration 012: Add is_own_invoice flag to filter outgoing invoices
 -- BMC's own CVR: 29522790
 -- Add column to track outgoing invoices (BMC's own invoices to customers)
 ALTER TABLE incoming_files 
 ADD COLUMN IF NOT EXISTS is_own_invoice BOOLEAN DEFAULT FALSE;
 -- Mark existing files with BMC's CVR as outgoing invoices
 UPDATE incoming_files 
 SET is_own_invoice = TRUE 
 WHERE detected_cvr = '29522790';
 -- Add index for faster filtering
 CREATE INDEX IF NOT EXISTS idx_incoming_files_is_own_invoice 
 ON incoming_files(is_own_invoice) 
 WHERE is_own_invoice = TRUE;
 -- Add comment
 COMMENT ON COLUMN incoming_files.is_own_invoice IS 
 'TRUE hvis filen er en udgående faktura fra BMC (CVR 29522790), FALSE hvis leverandør faktura';
--- a/migrations/012_template_default_category.sql
+++ b/migrations/012_template_default_category.sql
@ -0,0 +1,13 @@
 -- Migration 012: Add default product category to templates
 -- Allows templates to specify default category for line items (varesalg, drift, etc.)
 ALTER TABLE supplier_invoice_templates
 ADD COLUMN IF NOT EXISTS default_product_category VARCHAR(50) DEFAULT 'varesalg',
 ADD COLUMN IF NOT EXISTS default_product_group_number INTEGER;
 -- Valid categories: varesalg, drift, anlæg, abonnement, lager, udlejning
 COMMENT ON COLUMN supplier_invoice_templates.default_product_category IS 'Default kategori for varelinjer: varesalg, drift, anlæg, abonnement, lager, udlejning';
 COMMENT ON COLUMN supplier_invoice_templates.default_product_group_number IS 'Default e-conomic produktgruppe nummer';
 -- Add index for category lookups
 CREATE INDEX IF NOT EXISTS idx_supplier_invoice_templates_category ON supplier_invoice_templates(default_product_category);
--- a/requirements.txt
+++ b/requirements.txt
@ -15,3 +15,5 @@ PyPDF2==3.0.1
 pdfplumber==0.11.4
 pytesseract==0.3.13
 Pillow==11.0.0
 invoice2data==0.4.4
 pyyaml==6.0.2
--- a/scripts/backfill_quick_analysis.py
+++ b/scripts/backfill_quick_analysis.py
@ -0,0 +1,89 @@
 #!/usr/bin/env python3
 """
 Backfill quick analysis for existing files
 """
 import sys
 import asyncio
 from pathlib import Path
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from app.core.database import execute_query, execute_update, init_db
 from app.services.ollama_service import ollama_service
 async def backfill_quick_analysis():
    """Run quick analysis on all files that don't have it"""
    # Initialize database
    init_db()
    try:
        # Get files without quick analysis
        files = execute_query(
            """SELECT file_id, filename, file_path 
               FROM incoming_files 
               WHERE (detected_cvr IS NULL OR detected_document_number IS NULL)
               AND status NOT IN ('duplicate')
               AND file_path IS NOT NULL
               ORDER BY file_id DESC"""
        )
        print(f"📋 Found {len(files)} files without quick analysis")
        success_count = 0
        fail_count = 0
        for file in files:
            try:
                file_path = Path(file['file_path'])
                if not file_path.exists():
                    print(f"⚠️ File not found: {file_path}")
                    fail_count += 1
                    continue
                print(f"\n🔍 Processing: {file['filename']} (ID: {file['file_id']})")
                # Extract text
                text = await ollama_service._extract_text_from_file(file_path)
                # Run quick analysis
                quick_result = await ollama_service.quick_analysis_on_upload(text)
                # Update database
                execute_update(
                    """UPDATE incoming_files 
                       SET detected_cvr = %s, 
                           detected_vendor_id = %s,
                           detected_document_type = %s,
                           detected_document_number = %s
                       WHERE file_id = %s""",
                    (quick_result.get('cvr'),
                     quick_result.get('vendor_id'),
                     quick_result.get('document_type'),
                     quick_result.get('document_number'),
                     file['file_id'])
                )
                print(f"✅ Updated: CVR={quick_result.get('cvr')}, "
                      f"Type={quick_result.get('document_type')}, "
                      f"Number={quick_result.get('document_number')}, "
                      f"Vendor={quick_result.get('vendor_name')}")
                success_count += 1
            except Exception as e:
                print(f"❌ Error processing {file['filename']}: {e}")
                fail_count += 1
        print(f"\n📊 Summary: {success_count} successful, {fail_count} failed")
    except Exception as e:
        print(f"❌ Fatal error: {e}")
        raise
 if __name__ == "__main__":
    asyncio.run(backfill_quick_analysis())
--- a/static/design_templates/09_horizontal_dark/index.html
+++ b/static/design_templates/09_horizontal_dark/index.html
@ -1,4 +1,4 @@
-<!DOCTYPE html>
+ <!DOCTYPE html>
 <html lang="en" data-bs-theme="dark">
 <head>
    <meta charset="UTF-8">
--- a/test_quick_analysis.py
+++ b/test_quick_analysis.py
@ -0,0 +1,86 @@
 #!/usr/bin/env python3
 """
 Test Quick Analysis on Upload
 Tests CVR detection, document type, and invoice number extraction
 """
 import asyncio
 import sys
 from pathlib import Path
 # Add app directory to path
 sys.path.insert(0, str(Path(__file__).parent / "app"))
 from app.services.ollama_service import ollama_service
 async def test_quick_analysis():
    """Test quick analysis with sample text"""
    # Sample invoice text with CVR
    sample_invoice = """
    ALSO Danmark A/S
    Jupitervej 4
    6000 Kolding
    CVR-nr.: 35812428
    FAKTURA
    Faktura nr.: INV-2024-12345
    Dato: 2024-12-08
    Beløb i alt: 5.965,18 DKK
    """
    # Sample credit note text
    sample_credit_note = """
    Test Leverandør A/S
    CVR: 12345678
    KREDITNOTA
    Kreditnota nr.: CN-2024-5678
    Original faktura: INV-2024-1000
    Beløb: -1.234,56 DKK
    """
    print("🧪 Testing Quick Analysis\n")
    print("=" * 60)
    # Test 1: Invoice with CVR
    print("\n📄 TEST 1: Invoice with CVR")
    print("-" * 60)
    result1 = await ollama_service.quick_analysis_on_upload(sample_invoice)
    print(f"CVR: {result1['cvr']}")
    print(f"Document Type: {result1['document_type']}")
    print(f"Document Number: {result1['document_number']}")
    print(f"Vendor ID: {result1['vendor_id']}")
    print(f"Vendor Name: {result1['vendor_name']}")
    assert result1['cvr'] == '35812428', f"Expected CVR 35812428, got {result1['cvr']}"
    assert result1['document_type'] == 'invoice', f"Expected invoice, got {result1['document_type']}"
    assert result1['document_number'] == 'INV-2024-12345', f"Expected INV-2024-12345, got {result1['document_number']}"
    print("✅ Test 1 PASSED")
    # Test 2: Credit Note
    print("\n📄 TEST 2: Credit Note")
    print("-" * 60)
    result2 = await ollama_service.quick_analysis_on_upload(sample_credit_note)
    print(f"CVR: {result2['cvr']}")
    print(f"Document Type: {result2['document_type']}")
    print(f"Document Number: {result2['document_number']}")
    print(f"Vendor ID: {result2['vendor_id']}")
    print(f"Vendor Name: {result2['vendor_name']}")
    assert result2['cvr'] == '12345678', f"Expected CVR 12345678, got {result2['cvr']}"
    assert result2['document_type'] == 'credit_note', f"Expected credit_note, got {result2['document_type']}"
    assert result2['document_number'] == 'CN-2024-5678', f"Expected CN-2024-5678, got {result2['document_number']}"
    print("✅ Test 2 PASSED")
    print("\n" + "=" * 60)
    print("✅ ALL TESTS PASSED!")
    print("=" * 60)
 if __name__ == "__main__":
    asyncio.run(test_quick_analysis())