diff --git a/app/billing/backend/supplier_invoices.py b/app/billing/backend/supplier_invoices.py index 17ccbb6..a7d0ed1 100644 --- a/app/billing/backend/supplier_invoices.py +++ b/app/billing/backend/supplier_invoices.py @@ -23,14 +23,8 @@ router = APIRouter() def _smart_extract_lines(text: str) -> List[Dict]: """ - Multi-line extraction for ALSO invoices. - - Format: - 100 48023976 REFURB LENOVO ThinkPad P15 G1 Grde A - ...metadata lines... - 1ST 3.708,27 3.708,27 - - Combines data from description line + price line. + Universal line extraction using pdfplumber layout mode. + Tries pdfplumber columnar format first, then falls back to vendor-specific patterns. """ lines_arr = text.split('\n') items = [] @@ -44,7 +38,26 @@ def _smart_extract_lines(text: str) -> List[Dict]: i += 1 continue - # Pattern 1: ALSO format - "100 48023976 REFURB LENOVO..." + # Pattern 1: pdfplumber layout mode - " 1 95006 Betalingsmetode... 1 41,20 41,20" + # Whitespace-separated columns: position item_number description quantity unit_price total_price + # Most specific pattern - try first! + layout_match = re.match(r'^\s*(\d{1,2})\s+(\d{4,10})\s+(.+?)\s(\d{1,2})\s+([\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$', line) + if layout_match: + items.append({ + 'line_number': len(items) + 1, + 'position': layout_match.group(1), + 'item_number': layout_match.group(2), + 'description': layout_match.group(3).strip(), + 'quantity': layout_match.group(4), + 'unit_price': layout_match.group(5).replace(' ', '').replace(',', '.'), + 'total_price': layout_match.group(6).replace(' ', '').replace(',', '.'), + 'raw_text': line + }) + logger.info(f"✅ pdfplumber layout: {layout_match.group(2)} - {layout_match.group(3)[:30]}...") + i += 1 + continue + + # Pattern 2: ALSO format - "100 48023976 REFURB LENOVO..." (multi-line) item_match = re.match(r'^(\d{1,3})\s+(\d{6,})\s+(.+)', line) if item_match: position = item_match.group(1) @@ -80,8 +93,11 @@ def _smart_extract_lines(text: str) -> List[Dict]: i += 1 continue - # Pattern 2: DCS format - "195006Betalingsmetode... 141,2041,20" - dcs_match = re.match(r'^(\d{1,2})(\d{4,6})([^0-9]+?)\s+(\d+)([\d,]+)([\d,]+)$', line) + # Pattern 3: DCS single-line - "195006Betalingsmetode... 141,2041,20" (legacy PyPDF2 format) + # Position: 1 digit, Item: 4-10 digits, Description starts with letter + # Prices: Danish format 1-3 digits, comma, 2 decimals (e.g., 41,20 or 619,00) + # Quantity: 1-2 digits (non-greedy) before first price + dcs_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+?)(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})$', line) if dcs_match: items.append({ 'line_number': len(items) + 1, @@ -93,7 +109,40 @@ def _smart_extract_lines(text: str) -> List[Dict]: 'total_price': dcs_match.group(6).replace(',', '.'), 'raw_text': line }) - logger.info(f"✅ DCS: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...") + logger.info(f"✅ DCS single-line: {dcs_match.group(2)} - {dcs_match.group(3)[:30]}...") + i += 1 + continue + + # Pattern 4: DCS multi-line - "2994922511Ubiquiti..." then search for "...USW-FLEX 1619,00619,00" (legacy) + dcs_multi_match = re.match(r'^(\d)(\d{4,10})([A-Za-z].+)$', line) + if dcs_multi_match and not re.search(r'KN8|EAN|Model|Position|Varenr|Tekst', line): + position = dcs_multi_match.group(1) + item_number = dcs_multi_match.group(2) + description = dcs_multi_match.group(3).strip() + + # Search next 5 lines for quantity/prices (Danish format 1-3 digits before comma) + for j in range(1, 6): + if i + j >= len(lines_arr): + break + price_line = lines_arr[i + j].strip() + # Match: "S/N: ...USW-FLEX 1619,00619,00" - qty (1-2 digits, non-greedy) + TWO prices + price_match = re.search(r'(\d{1,2}?)(\d{1,3},\d{2})(\d{1,3},\d{2})\s*$', price_line) + if price_match: + quantity = price_match.group(1) + unit_price = price_match.group(2).replace(',', '.') + total_price = price_match.group(3).replace(',', '.') + items.append({ + 'line_number': len(items) + 1, + 'position': position, + 'item_number': item_number, + 'description': description, + 'quantity': quantity, + 'unit_price': unit_price, + 'total_price': total_price, + 'raw_text': f"{line} ... {price_line}" + }) + logger.info(f"✅ DCS multi-line: {item_number} - {description[:30]}...") + break i += 1 continue @@ -169,15 +218,41 @@ async def list_supplier_invoices( raise HTTPException(status_code=500, detail=str(e)) -@router.get("/supplier-invoices/pending-files") +@router.get("/pending-supplier-invoice-files") async def get_pending_files(): - """Hent liste over filer der venter på behandling""" + """Hent alle filer der venter på behandling, inkl. AI-extracted""" try: + # Hent både pending files OG ai_extracted files files = execute_query( - """SELECT file_id, filename, status, uploaded_at, error_message, template_id - FROM incoming_files - WHERE status IN ('pending', 'processing', 'failed') - ORDER BY uploaded_at DESC""" + """SELECT DISTINCT ON (f.file_id) + f.file_id, + f.filename, + f.status, + f.uploaded_at, + f.error_message, + f.template_id, + f.file_path, + -- Get vendor info from latest extraction + ext.vendor_name, + ext.vendor_cvr, + ext.vendor_matched_id, + v.name as matched_vendor_name, + -- Check if already has invoice via latest extraction only + si.id as existing_invoice_id, + si.invoice_number as existing_invoice_number + FROM incoming_files f + LEFT JOIN LATERAL ( + SELECT extraction_id, file_id, vendor_name, vendor_cvr, vendor_matched_id + FROM extractions + WHERE file_id = f.file_id + ORDER BY created_at DESC + LIMIT 1 + ) ext ON true + LEFT JOIN vendors v ON v.id = ext.vendor_matched_id + LEFT JOIN supplier_invoices si ON si.extraction_id = ext.extraction_id + WHERE f.status IN ('pending', 'processing', 'failed', 'ai_extracted', 'processed') + AND si.id IS NULL -- Only show files without invoice yet + ORDER BY f.file_id, f.uploaded_at DESC""" ) return {"files": files if files else [], "count": len(files) if files else 0} except Exception as e: @@ -185,6 +260,430 @@ async def get_pending_files(): raise HTTPException(status_code=500, detail=str(e)) +@router.get("/supplier-invoices/files/{file_id}/extracted-data") +async def get_file_extracted_data(file_id: int): + """Hent AI-extracted data fra en uploaded fil""" + try: + # Get file info + file_info = execute_query( + "SELECT * FROM incoming_files WHERE file_id = %s", + (file_id,), + fetchone=True + ) + + if not file_info: + raise HTTPException(status_code=404, detail="Fil ikke fundet") + + # Get extraction results if exists + extraction = execute_query( + "SELECT * FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1", + (file_id,), + fetchone=True + ) + + # Get extraction lines if exist + extraction_lines = [] + if extraction: + extraction_lines = execute_query( + """SELECT * FROM extraction_lines + WHERE extraction_id = %s + ORDER BY line_number""", + (extraction['extraction_id'],) + ) + + # Read PDF text if needed + pdf_text = None + if file_info['file_path']: + from pathlib import Path + file_path = Path(file_info['file_path']) + if file_path.exists(): + pdf_text = await ollama_service._extract_text_from_file(file_path) + + return { + "file_id": file_id, + "filename": file_info['filename'], + "status": file_info['status'], + "uploaded_at": file_info['uploaded_at'], + "extraction": extraction, + "extraction_lines": extraction_lines if extraction_lines else [], + "pdf_text_preview": pdf_text[:5000] if pdf_text else None + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to get extracted data: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/supplier-invoices/files/{file_id}/download") +async def download_pending_file(file_id: int): + """View PDF in browser""" + from fastapi.responses import FileResponse + from pathlib import Path + + try: + # Get file info + file_info = execute_query( + "SELECT * FROM incoming_files WHERE file_id = %s", + (file_id,), + fetchone=True + ) + + if not file_info: + raise HTTPException(status_code=404, detail="Fil ikke fundet") + + file_path = Path(file_info['file_path']) + if not file_path.exists(): + raise HTTPException(status_code=404, detail="Fil findes ikke på disk") + + # Return with inline disposition so browser displays it instead of downloading + return FileResponse( + path=str(file_path), + media_type='application/pdf', + headers={"Content-Disposition": f"inline; filename={file_info['filename']}"} + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to view file: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/files/{file_id}/link-vendor") +async def link_vendor_to_extraction(file_id: int, data: dict): + """Link an existing vendor to the extraction""" + try: + vendor_id = data.get('vendor_id') + if not vendor_id: + raise HTTPException(status_code=400, detail="vendor_id is required") + + # Verify vendor exists + vendor = execute_query( + "SELECT id, name FROM vendors WHERE id = %s", + (vendor_id,), + fetchone=True + ) + + if not vendor: + raise HTTPException(status_code=404, detail="Leverandør ikke fundet") + + # Get latest extraction for this file + extraction = execute_query( + "SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1", + (file_id,), + fetchone=True + ) + + if not extraction: + raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil") + + # Update extraction with vendor match + execute_update( + """UPDATE extractions + SET vendor_matched_id = %s + WHERE extraction_id = %s""", + (vendor_id, extraction['extraction_id']) + ) + + logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to extraction for file {file_id}") + + return { + "status": "success", + "vendor_id": vendor_id, + "vendor_name": vendor['name'] + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to link vendor: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/supplier-invoices/files/{file_id}") +async def delete_pending_file_endpoint(file_id: int): + """Slet uploaded fil og relateret data""" + import os + from pathlib import Path + + try: + # Get file info + file_info = execute_query( + "SELECT * FROM incoming_files WHERE file_id = %s", + (file_id,), + fetchone=True + ) + + if not file_info: + raise HTTPException(status_code=404, detail="Fil ikke fundet") + + # Check if already converted to invoice + invoice_exists = execute_query( + """SELECT si.id FROM supplier_invoices si + JOIN extractions e ON si.extraction_id = e.extraction_id + WHERE e.file_id = %s""", + (file_id,), + fetchone=True + ) + + if invoice_exists: + raise HTTPException( + status_code=400, + detail="Kan ikke slette fil - der er allerede oprettet en faktura fra denne fil" + ) + + # Delete from database (cascade will handle extractions) + execute_update( + "DELETE FROM incoming_files WHERE file_id = %s", + (file_id,) + ) + + # Delete physical file + if file_info['file_path']: + file_path = Path(file_info['file_path']) + if file_path.exists(): + os.remove(file_path) + logger.info(f"🗑️ Deleted file: {file_path}") + + return {"message": "Fil slettet", "file_id": file_id} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to delete file: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.patch("/supplier-invoices/files/{file_id}") +async def update_file_status(file_id: int, data: dict): + """Opdater status på uploadet fil""" + try: + allowed_statuses = ['pending', 'processing', 'processed', 'ai_extracted', 'completed', 'failed'] + new_status = data.get('status') + + if not new_status or new_status not in allowed_statuses: + raise HTTPException(status_code=400, detail=f"Ugyldig status. Tilladte: {', '.join(allowed_statuses)}") + + execute_update( + "UPDATE incoming_files SET status = %s, processed_at = CURRENT_TIMESTAMP WHERE file_id = %s", + (new_status, file_id) + ) + + logger.info(f"✅ Updated file {file_id} status to {new_status}") + + return {"status": "success", "file_id": file_id, "new_status": new_status} + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to update file status: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/files/{file_id}/link-vendor") +async def link_vendor_to_extraction(file_id: int, data: dict): + """Link en eksisterende leverandør til en extraction""" + try: + vendor_id = data.get('vendor_id') + + if not vendor_id: + raise HTTPException(status_code=400, detail="vendor_id er påkrævet") + + # Verify vendor exists + vendor = execute_query( + "SELECT id, name FROM vendors WHERE id = %s", + (vendor_id,), + fetchone=True + ) + + if not vendor: + raise HTTPException(status_code=404, detail=f"Leverandør {vendor_id} ikke fundet") + + # Get latest extraction for this file + extraction = execute_query( + "SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1", + (file_id,), + fetchone=True + ) + + if not extraction: + raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil") + + # Update extraction with vendor match + execute_update( + "UPDATE extractions SET vendor_matched_id = %s WHERE extraction_id = %s", + (vendor_id, extraction['extraction_id']) + ) + + logger.info(f"✅ Linked vendor {vendor['name']} (ID: {vendor_id}) to extraction {extraction['extraction_id']}") + + return { + "status": "success", + "vendor_id": vendor_id, + "vendor_name": vendor['name'], + "extraction_id": extraction['extraction_id'] + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to link vendor: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/from-extraction/{file_id}") +async def create_invoice_from_extraction(file_id: int): + """Opret leverandørfaktura fra extraction data""" + try: + # Get latest extraction for this file + extraction = execute_query( + """SELECT e.*, v.name as vendor_name + FROM extractions e + LEFT JOIN vendors v ON v.id = e.vendor_matched_id + WHERE e.file_id = %s + ORDER BY e.created_at DESC + LIMIT 1""", + (file_id,), + fetchone=True + ) + + if not extraction: + raise HTTPException(status_code=404, detail="Ingen extraction fundet for denne fil") + + # Check if vendor is matched + if not extraction['vendor_matched_id']: + raise HTTPException( + status_code=400, + detail="Leverandør skal linkes før faktura kan oprettes. Brug 'Link eller Opret Leverandør' først." + ) + + # Check if invoice already exists + existing = execute_query( + "SELECT id FROM supplier_invoices WHERE extraction_id = %s", + (extraction['extraction_id'],), + fetchone=True + ) + + if existing: + raise HTTPException(status_code=400, detail="Faktura er allerede oprettet fra denne extraction") + + # Get extraction lines + lines = execute_query( + """SELECT * FROM extraction_lines + WHERE extraction_id = %s + ORDER BY line_number""", + (extraction['extraction_id'],) + ) + + # Parse LLM response JSON if it's a string + import json + llm_data = extraction.get('llm_response_json') + if isinstance(llm_data, str): + try: + llm_data = json.loads(llm_data) + except: + llm_data = {} + elif not llm_data: + llm_data = {} + + # Get invoice number and type from LLM data or generate one + invoice_number = llm_data.get('invoice_number') if llm_data else None + if not invoice_number: + invoice_number = f"INV-{file_id}" + + # Detect document type (invoice or credit_note) + document_type = llm_data.get('document_type', 'invoice') if llm_data else 'invoice' + invoice_type = 'credit_note' if document_type == 'credit_note' else 'invoice' + + # Get dates - use today as fallback if missing + from datetime import datetime, timedelta + invoice_date = extraction.get('document_date') + if not invoice_date: + invoice_date = datetime.now().strftime('%Y-%m-%d') + logger.warning(f"⚠️ No invoice_date found, using today: {invoice_date}") + + due_date = extraction.get('due_date') + if not due_date: + # Default to 30 days from invoice date + inv_date_obj = datetime.strptime(invoice_date, '%Y-%m-%d') + due_date = (inv_date_obj + timedelta(days=30)).strftime('%Y-%m-%d') + logger.warning(f"⚠️ No due_date found, using invoice_date + 30 days: {due_date}") + + # Create supplier invoice + invoice_id = execute_insert( + """INSERT INTO supplier_invoices ( + vendor_id, invoice_number, invoice_date, due_date, + total_amount, currency, status, extraction_id, notes, invoice_type + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + RETURNING id""", + ( + extraction['vendor_matched_id'], + invoice_number, + invoice_date, + due_date, + extraction['total_amount'], + extraction['currency'], + 'credited' if invoice_type == 'credit_note' else 'unpaid', + extraction['extraction_id'], + f"Oprettet fra AI extraction (file_id: {file_id})", + invoice_type + ) + ) + + # Create invoice lines + if lines: + for line in lines: + execute_update( + """INSERT INTO supplier_invoice_lines ( + supplier_invoice_id, description, quantity, unit_price, + line_total, vat_rate, vat_amount + ) VALUES (%s, %s, %s, %s, %s, %s, %s)""", + ( + invoice_id, + line['description'], + line.get('quantity') or 1, + line.get('unit_price') or 0, + line.get('line_total') or 0, + line.get('vat_rate') or 25.00, # Default 25% Danish VAT if NULL + line.get('vat_amount') + ) + ) + + # Update file status + execute_update( + "UPDATE incoming_files SET status = 'completed' WHERE file_id = %s", + (file_id,) + ) + + logger.info(f"✅ Created supplier invoice {invoice_id} from extraction {extraction['extraction_id']}") + + return { + "status": "success", + "invoice_id": invoice_id, + "invoice_number": invoice_number, + "vendor_name": extraction['vendor_name'], + "total_amount": extraction['total_amount'], + "currency": extraction['currency'] + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to create invoice from extraction: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +# Keep existing endpoints below... + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to delete file: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + # ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ========== @router.get("/supplier-invoices/templates") @@ -206,6 +705,226 @@ async def list_templates(): raise HTTPException(status_code=500, detail=str(e)) +@router.post("/supplier-invoices/search-vendor") +async def search_vendor_by_info(request: Dict): + """ + Søg efter vendor baseret på navn, CVR, eller opret ny + + Request body: + { + "vendor_name": "DCS ApS", + "vendor_cvr": "12345678", + "vendor_address": "Vej 1, 2000 By", + "create_if_missing": true + } + """ + try: + vendor_name = request.get('vendor_name') + vendor_cvr = request.get('vendor_cvr') + vendor_address = request.get('vendor_address') + create_if_missing = request.get('create_if_missing', False) + + # Search by CVR first (most accurate) + if vendor_cvr: + vendor = execute_query( + "SELECT id, name, cvr_number FROM vendors WHERE cvr_number = %s", + (vendor_cvr,), + fetchone=True + ) + if vendor: + return { + "found": True, + "vendor_id": vendor['id'], + "vendor_name": vendor['name'], + "source": "cvr_match" + } + + # Search by name (fuzzy) + if vendor_name: + vendors = execute_query( + "SELECT id, name, cvr_number FROM vendors WHERE LOWER(name) LIKE LOWER(%s) LIMIT 5", + (f"%{vendor_name}%",) + ) + if vendors: + return { + "found": True, + "matches": vendors, + "source": "name_search", + "message": "Flere mulige matches - vælg en eller opret ny" + } + + # Create new vendor if requested + if create_if_missing and vendor_name: + from app.core.config import settings + + # Validate not creating vendor with own CVR + if vendor_cvr and settings.OWN_CVR in vendor_cvr: + raise HTTPException( + status_code=400, + detail=f"Kan ikke oprette vendor med eget CVR ({settings.OWN_CVR})" + ) + + new_vendor_id = execute_insert( + """INSERT INTO vendors (name, cvr_number, address, created_at) + VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""", + (vendor_name, vendor_cvr, vendor_address) + ) + + logger.info(f"✅ Created new vendor: {vendor_name} (ID: {new_vendor_id})") + + return { + "found": False, + "created": True, + "vendor_id": new_vendor_id, + "vendor_name": vendor_name, + "source": "newly_created" + } + + return { + "found": False, + "message": "Ingen vendor fundet - angiv create_if_missing=true for at oprette" + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Vendor search failed: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/supplier-invoices/ai/analyze") +async def ai_analyze_invoice(request: Dict): + """Brug AI til at analysere faktura og foreslå template felter""" + try: + pdf_text = request.get('pdf_text', '') + vendor_id = request.get('vendor_id') + + if not pdf_text: + raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet") + + # Build enhanced PDF text with instruction + from app.core.config import settings + + enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering. + +RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! + +REQUIRED STRUKTUR (alle felter skal med): +{{ + "invoice_number": "5082481", + "invoice_date": "24/10-25", + "total_amount": "1471.20", + "cvr": "29522790", + "detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"], + "lines_start": "Nr.VarenrTekst", + "lines_end": "Subtotal" +}} + +FIND FØLGENDE: +1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice") +2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY) +3. total_amount: Total beløb + - Søg efter "Total", "I alt", "Totalbeløb" + - Hvis beløbet er på næste linje, match sidste tal + - Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20) +4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK") + - IGNORER CVR {settings.OWN_CVR} - dette er KØBERS CVR, ikke leverandør! + - Find LEVERANDØRENS CVR (normalt i toppen/header) +5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren + - Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S") + - Website eller email (f.eks. "WWW.DCS.DK") + - Adresse element (f.eks. "Høgemosevænget", "Mårkærvej") + - UNDGÅ generiske ord som "Faktura", "Danmark", "Side" +6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr") +7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af") + +VIGTIGT: +- detection_patterns SKAL være mindst 3 specifikke tekststrenge +- Vælg tekststrenge der er UNIKKE for denne leverandør +- CVR SKAL være leverandørens - IKKE {settings.OWN_CVR} (det er køber) +- LAD VÆRE med at lave patterns eller line_item regex - kun udtræk rå data + +PDF TEKST: +{pdf_text[:2000]} + +RETURNER KUN JSON - intet andet!""" + + # Call Ollama + logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst") + result = await ollama_service.extract_from_text(enhanced_text) + + if not result: + raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen") + + logger.info(f"✅ AI analyse gennemført: {result}") + return result + + except Exception as e: + logger.error(f"❌ AI analyse fejlede: {e}") + raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}") + + +@router.post("/supplier-invoices/templates") +async def create_template(request: Dict): + """ + Opret ny template + + Request body: + { + "vendor_id": 1, + "template_name": "Test Template", + "detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}], + "field_mappings": {"invoice_number": {"pattern": r"Nummer\s*(\d+)", "group": 1}} + } + """ + try: + import json + from app.core.config import settings + + vendor_id = request.get('vendor_id') + template_name = request.get('template_name') + detection_patterns = request.get('detection_patterns', []) + field_mappings = request.get('field_mappings', {}) + + if not vendor_id or not template_name: + raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet") + + # Validate that vendor CVR is not own company + vendor_cvr_mapping = field_mappings.get('vendor_cvr', {}) + if vendor_cvr_mapping: + # Extract CVR value from pattern or value field + cvr_value = vendor_cvr_mapping.get('value') or vendor_cvr_mapping.get('pattern', '') + if settings.OWN_CVR in str(cvr_value): + raise HTTPException( + status_code=400, + detail=f"CVR {cvr_value} matcher egen virksomhed ({settings.OWN_CVR}). Brug leverandørens CVR, ikke købers!" + ) + + # Insert template and get template_id + query = """ + INSERT INTO supplier_invoice_templates + (vendor_id, template_name, detection_patterns, field_mappings) + VALUES (%s, %s, %s, %s) + RETURNING template_id + """ + result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings))) + template_id = result[0]['template_id'] if result else None + + if not template_id: + raise HTTPException(status_code=500, detail="Kunne ikke oprette template") + + # Reload templates in cache + template_service.reload_templates() + + logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}") + return {"template_id": template_id, "message": "Template oprettet"} + except HTTPException: + raise + except Exception as e: + logger.error(f"❌ Failed to create template: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/supplier-invoices/{invoice_id}") async def get_supplier_invoice(invoice_id: int): """Get single supplier invoice with lines""" @@ -271,13 +990,18 @@ async def create_supplier_invoice(data: Dict): if not due_date: due_date = (invoice_date + timedelta(days=30)).strftime('%Y-%m-%d') + # Determine invoice type (default to invoice) + invoice_type = data.get('invoice_type', 'invoice') + if invoice_type not in ['invoice', 'credit_note']: + invoice_type = 'invoice' + # Insert supplier invoice invoice_id = execute_insert( """INSERT INTO supplier_invoices (invoice_number, vendor_id, vendor_name, invoice_date, due_date, total_amount, vat_amount, net_amount, currency, description, notes, - status, created_by) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'pending', %s)""", + status, created_by, invoice_type) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", ( data['invoice_number'], data['vendor_id'], @@ -290,7 +1014,9 @@ async def create_supplier_invoice(data: Dict): data.get('currency', 'DKK'), data.get('description'), data.get('notes'), - data.get('created_by') + 'credited' if invoice_type == 'credit_note' else 'pending', + data.get('created_by'), + invoice_type ) ) @@ -744,6 +1470,8 @@ async def upload_supplier_invoice(file: UploadFile = File(...)): "pdf_text": str # For manual review } """ + from app.core.config import settings + try: # Validate file extension suffix = Path(file.filename).suffix.lower() @@ -850,6 +1578,15 @@ async def upload_supplier_invoice(file: UploadFile = File(...)): if template: vendor_id = template.get('vendor_id') + # Save extraction to database + import json + extraction_id = execute_insert( + """INSERT INTO extractions + (file_id, template_id, extraction_method, raw_data, extracted_at) + VALUES (%s, %s, %s, %s, CURRENT_TIMESTAMP)""", + (file_id, template_id, 'template', json.dumps(extracted_fields)) + ) + # Log usage template_service.log_usage(template_id, file_id, True, confidence, extracted_fields) @@ -861,13 +1598,86 @@ async def upload_supplier_invoice(file: UploadFile = File(...)): (template_id, file_id) ) else: - logger.info("ℹ️ No template matched - manual entry required") - execute_update( - """UPDATE incoming_files - SET status = 'pending', processed_at = CURRENT_TIMESTAMP - WHERE file_id = %s""", - (file_id,) - ) + # FALLBACK: Use AI to extract data universally + logger.info("🤖 No template matched - using AI universal extraction...") + + try: + # Build AI prompt for universal extraction + ai_prompt = f"""OPGAVE: Analyser denne danske faktura og udtræk nøgledata. + +RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! + +REQUIRED STRUKTUR: +{{ + "invoice_number": "5082481", + "invoice_date": "2025-10-24", + "due_date": "2025-11-24", + "total_amount": "1471.20", + "currency": "DKK", + "vendor_name": "DCS ApS", + "vendor_cvr": "29522790", + "vendor_address": "Høgemosevænget 89, 2820 Gentofte", + "line_items": [ + {{"description": "Ubiquiti Switch", "quantity": 1, "unit_price": "619.00", "total": "619.00"}} + ] +}} + +VIGTIGT: +- Dato format: YYYY-MM-DD +- Ignorer CVR {settings.OWN_CVR} (det er KØBERS CVR - find LEVERANDØRENS CVR) +- currency: Normalt "DKK" for danske fakturaer +- line_items: Udtræk så mange linjer som muligt +- Hvis et felt ikke kan findes, brug null + +PDF TEKST: +{text[:3000]} + +RETURNER KUN JSON!""" + + # Call AI + ai_result = await ollama_service.extract_from_text(ai_prompt) + + if ai_result and ai_result.get('vendor_cvr'): + # Try to find existing vendor by CVR + vendor = execute_query( + "SELECT id, name FROM vendors WHERE cvr_number = %s", + (ai_result['vendor_cvr'],), + fetchone=True + ) + + if vendor: + vendor_id = vendor['id'] + logger.info(f"✅ AI matched vendor: {vendor['name']} (CVR: {ai_result['vendor_cvr']})") + else: + logger.info(f"ℹ️ AI found unknown vendor CVR: {ai_result['vendor_cvr']}") + + extracted_fields = ai_result + + # Save extraction to database + import json + extraction_id = execute_insert( + """INSERT INTO extractions + (file_id, extraction_method, raw_data, extracted_at) + VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""", + (file_id, 'ai_universal', json.dumps(ai_result)) + ) + + execute_update( + """UPDATE incoming_files + SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + + except Exception as ai_error: + logger.warning(f"⚠️ AI extraction failed: {ai_error} - manual entry required") + execute_update( + """UPDATE incoming_files + SET status = 'pending', processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + # Return data for user to review and confirm return { @@ -911,6 +1721,9 @@ async def reprocess_uploaded_file(file_id: int): Genbehandl en uploadet fil med template matching Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet """ + import json + from datetime import datetime, timedelta + try: # Get file record file_record = execute_query( @@ -954,13 +1767,135 @@ async def reprocess_uploaded_file(file_id: int): (template_id, file_id) ) else: - logger.info("ℹ️ Ingen template match") + logger.info("🤖 Ingen template match - bruger AI udtrækning med forbedret system prompt") + + # Use improved Ollama service with credit note detection + ai_result = await ollama_service.extract_from_text(text) + + if not ai_result or 'error' in ai_result: + execute_update( + """UPDATE incoming_files + SET status = 'failed', error_message = 'AI udtrækning returnerede ingen data', + processed_at = CURRENT_TIMESTAMP + WHERE file_id = %s""", + (file_id,) + ) + return { + "status": "failed", + "file_id": file_id, + "error": "AI udtrækning fejlede" + } + + # Search for vendor by CVR (normalize: remove DK prefix) + vendor_cvr = ai_result.get('vendor_cvr', '').replace('DK', '').replace('dk', '').strip() + vendor_id = None + + # CRITICAL: If AI mistakenly identified our own company as vendor, reject it + if vendor_cvr == settings.OWN_CVR: + logger.warning(f"⚠️ AI wrongly identified BMC Denmark (CVR {settings.OWN_CVR}) as vendor - this is the customer!") + vendor_cvr = None + ai_result['vendor_cvr'] = None + ai_result['vendor_name'] = None + + if vendor_cvr: + vendor = execute_query( + "SELECT id, name FROM vendors WHERE cvr_number = %s", + (vendor_cvr,), + fetchone=True + ) + if vendor: + vendor_id = vendor['id'] + logger.info(f"✅ Matched vendor: {vendor['name']} (CVR: {vendor_cvr})") + else: + logger.warning(f"⚠️ Vendor not found for CVR: {vendor_cvr}") + + # Extract dates from raw text if AI didn't provide them + invoice_date = ai_result.get('invoice_date') + due_date = ai_result.get('due_date') + + # Validate and clean dates + if invoice_date == '': + invoice_date = None + if due_date == '' or not due_date: + # If no due date, default to 30 days after invoice date + if invoice_date: + from datetime import datetime, timedelta + try: + inv_date_obj = datetime.strptime(invoice_date, '%Y-%m-%d') + due_date_obj = inv_date_obj + timedelta(days=30) + due_date = due_date_obj.strftime('%Y-%m-%d') + logger.info(f"📅 Calculated due_date: {due_date} (invoice_date + 30 days)") + except: + due_date = None + else: + due_date = None + + if not invoice_date and 'raw_text_snippet' in ai_result: + # Try to find date in format "Dato: DD.MM.YYYY" + import re + from datetime import datetime + date_match = re.search(r'Dato:\s*(\d{2})\.(\d{2})\.(\d{4})', ai_result['raw_text_snippet']) + if date_match: + day, month, year = date_match.groups() + invoice_date = f"{year}-{month}-{day}" + logger.info(f"📅 Extracted invoice_date from text: {invoice_date}") + + # Normalize line items (AI might return 'lines' or 'line_items') + line_items = ai_result.get('line_items') or ai_result.get('lines') or [] + + # Use matched vendor name if found, otherwise use AI's name + vendor_name = ai_result.get('vendor_name') + if vendor_id and vendor: + vendor_name = vendor['name'] # Override with actual vendor name from database + logger.info(f"✅ Using matched vendor name: {vendor_name}") + + # Save extraction to database with document_type_detected + document_type = ai_result.get('document_type', 'invoice') + extraction_id = execute_insert( + """INSERT INTO extractions ( + file_id, vendor_matched_id, llm_response_json, + vendor_name, vendor_cvr, document_date, due_date, + total_amount, currency, confidence, status, document_type_detected + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'extracted', %s) + RETURNING extraction_id""", + ( + file_id, vendor_id, json.dumps(ai_result), + vendor_name, vendor_cvr, # Use corrected vendor name + invoice_date, due_date, # Use extracted dates + ai_result.get('total_amount'), ai_result.get('currency', 'DKK'), + ai_result.get('confidence', 0.8), + document_type # Store detected document type (invoice or credit_note) + ) + ) + + # Save line items (handle both 'lines' and 'line_items') + if line_items: + for idx, line in enumerate(line_items, 1): + execute_update( + """INSERT INTO extraction_lines ( + extraction_id, line_number, description, quantity, + unit_price, line_total, vat_rate, vat_amount, confidence + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)""", + ( + extraction_id, idx, + line.get('description'), + line.get('quantity'), + line.get('unit_price'), + line.get('total_price') or line.get('line_total'), + line.get('vat_rate'), + line.get('vat_amount'), + line.get('confidence', 0.8) + ) + ) + execute_update( """UPDATE incoming_files - SET status = 'pending', processed_at = CURRENT_TIMESTAMP + SET status = 'ai_extracted', processed_at = CURRENT_TIMESTAMP, error_message = NULL WHERE file_id = %s""", (file_id,) ) + + extracted_fields = ai_result return { "status": "success", @@ -969,136 +1904,17 @@ async def reprocess_uploaded_file(file_id: int): "template_matched": template_id is not None, "template_id": template_id, "vendor_id": vendor_id, - "confidence": confidence, + "confidence": confidence if template_id else 0.8, "extracted_fields": extracted_fields, - "pdf_text": text # Return full text for template builder + "pdf_text": text[:1000] if not template_id else text } except HTTPException: raise except Exception as e: - logger.error(f"❌ Reprocess failed for file {file_id}: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Genbehandling fejlede: {str(e)}") -# ========== TEMPLATE MANAGEMENT ========== - -@router.post("/supplier-invoices/ai-analyze") -async def ai_analyze_invoice(request: Dict): - """Brug AI til at analysere faktura og foreslå template felter""" - try: - pdf_text = request.get('pdf_text', '') - vendor_id = request.get('vendor_id') - - if not pdf_text: - raise HTTPException(status_code=400, detail="Ingen PDF tekst angivet") - - # Build enhanced PDF text with instruction - enhanced_text = f"""OPGAVE: Analyser denne danske faktura og udtræk information til template-generering. - -RETURNER KUN VALID JSON - ingen forklaring, ingen markdown, kun ren JSON! - -REQUIRED STRUKTUR (alle felter skal med): -{{ - "invoice_number": "5082481", - "invoice_date": "24/10-25", - "total_amount": "1471.20", - "cvr": "29522790", - "detection_patterns": ["DCS ApS", "WWW.DCS.DK", "Høgemosevænget"], - "lines_start": "Nr.VarenrTekst", - "lines_end": "Subtotal" -}} - -FIND FØLGENDE: -1. invoice_number: Fakturanummer (efter "Nummer", "Faktura nr", "Invoice") -2. invoice_date: Dato (format DD/MM-YY eller DD-MM-YYYY) -3. total_amount: Total beløb - - Søg efter "Total", "I alt", "Totalbeløb" - - Hvis beløbet er på næste linje, match sidste tal - - Format: [\d.,]+ (f.eks. 1.471,20 eller 1471.20) -4. cvr: CVR nummer (8 cifre efter "CVR", "Momsnr", "DK") -5. detection_patterns: 3-5 UNIKKE tekststrenge der identificerer leverandøren - - Leverandørens navn (f.eks. "DCS ApS", "ALSO A/S") - - Website eller email (f.eks. "WWW.DCS.DK") - - Adresse element (f.eks. "Høgemosevænget", "Mårkærvej") - - UNDGÅ generiske ord som "Faktura", "Danmark", "Side" -6. lines_start: Tekst LIGE FØR varelinjer (f.eks. "Nr.VarenrTekst", "Position Varenr") -7. lines_end: Tekst EFTER varelinjer (f.eks. "Subtotal", "I alt", "Side 1 af") - -VIGTIGT: -- detection_patterns SKAL være mindst 3 specifikke tekststrenge -- Vælg tekststrenge der er UNIKKE for denne leverandør -- LAV IKKE patterns eller line_item - kun udtræk data - -PDF TEKST: -{pdf_text[:2000]} - -RETURNER KUN JSON - intet andet!""" - - # Call Ollama - logger.info(f"🤖 Starter AI analyse af {len(pdf_text)} tegn PDF tekst") - result = await ollama_service.extract_from_text(enhanced_text) - - if not result: - raise HTTPException(status_code=500, detail="AI kunne ikke analysere fakturaen") - - logger.info(f"✅ AI analyse gennemført: {result}") - return result - - except Exception as e: - logger.error(f"❌ AI analyse fejlede: {e}") - raise HTTPException(status_code=500, detail=f"AI analyse fejlede: {str(e)}") - - -@router.post("/supplier-invoices/templates") -async def create_template(request: Dict): - """ - Opret ny template - - Request body: - { - "vendor_id": 1, - "template_name": "Test Template", - "detection_patterns": [{"type": "text", "pattern": "BMC Denmark", "weight": 0.5}], - "field_mappings": {"invoice_number": {"pattern": "Nummer\\s*(\\d+)", "group": 1}} - } - """ - try: - import json - - vendor_id = request.get('vendor_id') - template_name = request.get('template_name') - detection_patterns = request.get('detection_patterns', []) - field_mappings = request.get('field_mappings', {}) - - if not vendor_id or not template_name: - raise HTTPException(status_code=400, detail="vendor_id og template_name er påkrævet") - - # Insert template and get template_id - query = """ - INSERT INTO supplier_invoice_templates - (vendor_id, template_name, detection_patterns, field_mappings) - VALUES (%s, %s, %s, %s) - RETURNING template_id - """ - result = execute_query(query, (vendor_id, template_name, json.dumps(detection_patterns), json.dumps(field_mappings))) - template_id = result[0]['template_id'] if result else None - - if not template_id: - raise HTTPException(status_code=500, detail="Kunne ikke oprette template") - - # Reload templates in cache - template_service.reload_templates() - - logger.info(f"✅ Template created: {template_name} (ID: {template_id}) for vendor {vendor_id}") - return {"template_id": template_id, "message": "Template oprettet"} - except HTTPException: - raise - except Exception as e: - logger.error(f"❌ Failed to create template: {e}", exc_info=True) - raise HTTPException(status_code=500, detail=str(e)) - - @router.put("/supplier-invoices/templates/{template_id}") async def update_template( template_id: int, @@ -1194,7 +2010,7 @@ async def test_template(template_id: int, request: Dict): field_mappings = template.get('field_mappings', {}) # Test detection patterns - total_score = 0.0 + total_score = 0.0 max_score = 0.0 detection_results = [] diff --git a/app/billing/frontend/supplier_invoices.html b/app/billing/frontend/supplier_invoices.html index 4f70170..6e3d5cc 100644 --- a/app/billing/frontend/supplier_invoices.html +++ b/app/billing/frontend/supplier_invoices.html @@ -1,223 +1,108 @@ - - - - - - Leverandørfakturaer (Kassekladde) - BMC Hub - - - +{% endblock %} - .form-control, .form-select { - border-radius: 8px; - border: 1px solid #dee2e6; - padding: 0.6rem 1rem; - } - - .form-control:focus, .form-select:focus { - border-color: var(--accent); - box-shadow: 0 0 0 0.2rem rgba(15, 76, 117, 0.15); - } - - .line-item { - background: var(--accent-light); - padding: 1rem; - border-radius: 8px; - margin-bottom: 0.5rem; - } - - .filter-pills { - display: flex; - gap: 0.5rem; - flex-wrap: wrap; - margin-bottom: 1rem; - } - - .filter-pill { - padding: 0.5rem 1rem; - border-radius: 20px; - border: 1px solid #dee2e6; - background: var(--bg-card); - cursor: pointer; - transition: all 0.2s; - font-size: 0.9rem; - } - - .filter-pill:hover, .filter-pill.active { - background: var(--accent); - color: white; - border-color: var(--accent); - } - - - - - - - - -
- +{% block content %}
@@ -269,58 +154,123 @@
- -
-
-
-
- Alle -
-
- Afventer -
-
- Godkendt -
-
- Sendt til e-conomic -
-
- Overskredet -
-
-
-
+ + - -
-
-
- - - - - - - - - - - - - - - - - - -
Fakturanr.LeverandørFakturadatoForfaldsdatoBeløbStatuse-conomicHandlinger
-
- Indlæser... -
-
+ +
+ + +
+ + +
+
+
+
+ Alle +
+
+ Afventer +
+
+ Godkendt +
+
+ Sendt til e-conomic +
+
+ Overskredet +
+
+
+ + +
+
+
+ + + + + + + + + + + + + + + + + + +
Fakturanr.LeverandørFakturadatoForfaldsdatoBeløbStatuse-conomicHandlinger
+
+ Indlæser... +
+
+
+
+
+
+ + +
+ + +
+
+
+
📁 Uploadede filer afventer behandling
+ +
+
+ + + + + + + + + + + + + + + + +
FilnavnUpload DatoStatusLeverandørTemplateHandlinger
+
+ Indlæser... +
+
+
+
+
+ +
+
@@ -452,9 +402,10 @@
- - -
Max 50 MB. AI vil udtrække CVR, fakturanummer, beløb og linjer.
+ + +
Max 50 MB pr. fil. Vælg flere filer med Cmd/Ctrl. AI vil udtrække CVR, fakturanummer, beløb og linjer.
+
@@ -482,7 +433,226 @@
- + + + + + + + + + +{% endblock %} + +{% block extra_js %} - - - +{% endblock %} diff --git a/app/billing/frontend/views.py b/app/billing/frontend/views.py index 87c6aaf..c471bae 100644 --- a/app/billing/frontend/views.py +++ b/app/billing/frontend/views.py @@ -3,25 +3,36 @@ Billing Frontend Views Serves HTML pages for billing features """ -from fastapi import APIRouter -from fastapi.responses import FileResponse +from fastapi import APIRouter, Request +from fastapi.templating import Jinja2Templates +from fastapi.responses import HTMLResponse router = APIRouter() +templates = Jinja2Templates(directory="app") -@router.get("/billing/supplier-invoices") -async def supplier_invoices_page(): +@router.get("/billing/supplier-invoices", response_class=HTMLResponse) +async def supplier_invoices_page(request: Request): """Supplier invoices (kassekladde) page""" - return FileResponse("app/billing/frontend/supplier_invoices.html") + return templates.TemplateResponse("billing/frontend/supplier_invoices.html", { + "request": request, + "title": "Kassekladde" + }) -@router.get("/billing/template-builder") -async def template_builder_page(): +@router.get("/billing/template-builder", response_class=HTMLResponse) +async def template_builder_page(request: Request): """Template builder for supplier invoice extraction""" - return FileResponse("app/billing/frontend/template_builder.html") + return templates.TemplateResponse("billing/frontend/template_builder.html", { + "request": request, + "title": "Template Builder" + }) -@router.get("/billing/templates") -async def templates_list_page(): +@router.get("/billing/templates", response_class=HTMLResponse) +async def templates_list_page(request: Request): """Templates list and management page""" - return FileResponse("app/billing/frontend/templates_list.html") + return templates.TemplateResponse("billing/frontend/templates_list.html", { + "request": request, + "title": "Templates" + }) diff --git a/app/core/config.py b/app/core/config.py index ec54483..9574d5a 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -37,6 +37,9 @@ class Settings(BaseSettings): OLLAMA_ENDPOINT: str = "http://ai_direct.cs.blaahund.dk" OLLAMA_MODEL: str = "qwen2.5:3b" # Hurtigere model til JSON extraction + # Company Info + OWN_CVR: str = "29522790" # BMC Denmark ApS - ignore when detecting vendors + # File Upload UPLOAD_DIR: str = "uploads" MAX_FILE_SIZE_MB: int = 50 diff --git a/app/services/ollama_service.py b/app/services/ollama_service.py index e4738b2..598d60c 100644 --- a/app/services/ollama_service.py +++ b/app/services/ollama_service.py @@ -27,37 +27,52 @@ class OllamaService: def _build_system_prompt(self) -> str: """Build Danish system prompt for invoice extraction with CVR""" - return """Du er en ekspert i at læse og udtrække strukturerede data fra danske fakturaer og leverandørdokumenter. + return """Du er en ekspert i at læse og udtrække strukturerede data fra danske fakturaer, kreditnotaer og leverandørdokumenter. VIGTIGE REGLER: 1. Returner KUN gyldig JSON - ingen forklaring eller ekstra tekst 2. Hvis et felt ikke findes, sæt det til null 3. Beregn confidence baseret på hvor sikker du er på hvert felt (0.0-1.0) 4. Datoer skal være i format YYYY-MM-DD -5. Tal skal være decimaler (brug . som decimalseparator) +5. DANSKE PRISFORMATER: + - Tusind-separator kan være . (punkt) eller mellemrum: "5.965,18" eller "5 965,18" + - Decimal-separator er , (komma): "1.234,56 kr" + - I JSON output skal du bruge . (punkt) som decimal: 1234.56 + - Eksempel: "5.965,18 kr" → 5965.18 i JSON + - Eksempel: "1.234,56 DKK" → 1234.56 i JSON 6. CVR-nummer skal være 8 cifre uden mellemrum 7. Moms/VAT skal udtrækkes fra hver linje hvis muligt +8. DOKUMENTTYPE DETEKTION: + - "invoice" = Almindelig faktura + - "credit_note" = Kreditnota (refusion, tilbagebetaling, korrektion) + - Kig efter ord som: "Kreditnota", "Credit Note", "Refusion", "Tilbagebetaling", "Godtgørelse" +9. BELØB OG FORTEGN (ABSOLUT KRITISK): + - **ALMINDELIGE FAKTURAER**: Alle beløb skal være POSITIVE tal (total_amount > 0, line_total > 0) + - **KREDITNOTAER**: Alle beløb skal være NEGATIVE tal (total_amount < 0, line_total < 0) + - Hvis dokumentet siger "Faktura" → document_type: "invoice" → POSITIVE beløb + - Hvis dokumentet siger "Kreditnota" → document_type: "credit_note" → NEGATIVE beløb JSON format skal være: { - "document_type": "invoice", - "invoice_number": "fakturanummer", + "document_type": "invoice" eller "credit_note", + "invoice_number": "fakturanummer eller kreditnota nummer", "vendor_name": "leverandør firmanavn", "vendor_cvr": "12345678", "invoice_date": "YYYY-MM-DD", "due_date": "YYYY-MM-DD", "currency": "DKK", - "total_amount": 1234.56, - "vat_amount": 123.45, + "total_amount": 1234.56 (NEGATIVT for kreditnotaer), + "vat_amount": 123.45 (NEGATIVT for kreditnotaer), + "original_invoice_reference": "reference til original faktura (kun for kreditnotaer)", "lines": [ { "line_number": 1, "description": "beskrivelse af varen/ydelsen", "quantity": antal_som_tal, - "unit_price": pris_per_stk, - "line_total": total_for_linjen, + "unit_price": pris_per_stk (NEGATIVT for kreditnotaer), + "line_total": total_for_linjen (NEGATIVT for kreditnotaer), "vat_rate": 25.00, - "vat_amount": moms_beløb, + "vat_amount": moms_beløb (NEGATIVT for kreditnotaer), "confidence": 0.0_til_1.0 } ], @@ -65,24 +80,48 @@ JSON format skal være: "raw_text_snippet": "første 200 tegn fra dokumentet" } -EKSEMPEL: -Input: "FAKTURA 2025-001\\nGlobalConnect A/S\\nCVR: 12345678\\n1 stk Fiber 100/100 Mbit @ 299,00 DKK\\nMoms (25%): 74,75 DKK\\nTotal: 373,75 DKK" +EKSEMPEL PÅ FAKTURA (POSITIVE BELØB): +Input: "FAKTURA 2025-001\\nGlobalConnect A/S\\nCVR: 12345678\\n1 stk iPhone 16 @ 5.965,18 DKK\\nMoms (25%): 1.491,30 DKK\\nTotal: 7.456,48 DKK" Output: { "document_type": "invoice", "invoice_number": "2025-001", "vendor_name": "GlobalConnect A/S", "vendor_cvr": "12345678", - "total_amount": 373.75, - "vat_amount": 74.75, + "total_amount": 7456.48, + "vat_amount": 1491.30, "lines": [{ "line_number": 1, - "description": "Fiber 100/100 Mbit", + "description": "iPhone 16", "quantity": 1, - "unit_price": 299.00, - "line_total": 299.00, + "unit_price": 5965.18, + "line_total": 5965.18, "vat_rate": 25.00, - "vat_amount": 74.75, + "vat_amount": 1491.30, + "confidence": 0.95 + }], + "confidence": 0.95 +} + +EKSEMPEL PÅ KREDITNOTA (NEGATIVE BELØB): +Input: "KREDITNOTA CN-2025-042\\nGlobalConnect A/S\\nCVR: 12345678\\nReference: Faktura 2025-001\\nTilbagebetaling:\\n1 stk iPhone 16 returneret @ -5.965,18 DKK\\nMoms (25%): -1.491,30 DKK\\nTotal: -7.456,48 DKK" + +Output: { + "document_type": "credit_note", + "invoice_number": "CN-2025-042", + "vendor_name": "GlobalConnect A/S", + "vendor_cvr": "12345678", + "original_invoice_reference": "2025-001", + "total_amount": -7456.48, + "vat_amount": -1491.30, + "lines": [{ + "line_number": 1, + "description": "iPhone 16 returneret", + "quantity": 1, + "unit_price": -5965.18, + "line_total": -5965.18, + "vat_rate": 25.00, + "vat_amount": -1491.30, "confidence": 0.95 }], "confidence": 0.95 @@ -99,11 +138,8 @@ Output: { Extracted data as dict with CVR, invoice number, amounts, etc. """ - # Truncate text if too long (keep first 4000 chars) - if len(text) > 4000: - text = text[:4000] + "\\n[... tekst afkortet ...]" - - prompt = f"{self.system_prompt}\\n\\nNU SKAL DU UDTRÆKKE DATA FRA DENNE FAKTURA:\\n{text}\\n\\nReturner kun gyldig JSON:" + # No truncation - send full text to AI + prompt = f"{self.system_prompt}\n\nNU SKAL DU UDTRÆKKE DATA FRA DENNE FAKTURA:\n{text}\n\nReturner kun gyldig JSON:" logger.info(f"🤖 Extracting invoice data from text (length: {len(text)})") @@ -136,6 +172,48 @@ Output: { # Parse JSON from response extraction = self._parse_json_response(raw_response) + # CRITICAL: Fix amount signs based on document_type + # LLM sometimes returns negative amounts for invoices - fix this! + document_type = extraction.get('document_type', 'invoice') + + if document_type == 'invoice': + # Normal invoices should have POSITIVE amounts + if extraction.get('total_amount') and extraction['total_amount'] < 0: + logger.warning(f"⚠️ Fixing negative total_amount for invoice: {extraction['total_amount']} → {abs(extraction['total_amount'])}") + extraction['total_amount'] = abs(extraction['total_amount']) + + if extraction.get('vat_amount') and extraction['vat_amount'] < 0: + extraction['vat_amount'] = abs(extraction['vat_amount']) + + # Fix line totals + if 'lines' in extraction: + for line in extraction['lines']: + if line.get('unit_price') and line['unit_price'] < 0: + line['unit_price'] = abs(line['unit_price']) + if line.get('line_total') and line['line_total'] < 0: + line['line_total'] = abs(line['line_total']) + if line.get('vat_amount') and line['vat_amount'] < 0: + line['vat_amount'] = abs(line['vat_amount']) + + elif document_type == 'credit_note': + # Credit notes should have NEGATIVE amounts + if extraction.get('total_amount') and extraction['total_amount'] > 0: + logger.warning(f"⚠️ Fixing positive total_amount for credit_note: {extraction['total_amount']} → {-abs(extraction['total_amount'])}") + extraction['total_amount'] = -abs(extraction['total_amount']) + + if extraction.get('vat_amount') and extraction['vat_amount'] > 0: + extraction['vat_amount'] = -abs(extraction['vat_amount']) + + # Fix line totals + if 'lines' in extraction: + for line in extraction['lines']: + if line.get('unit_price') and line['unit_price'] > 0: + line['unit_price'] = -abs(line['unit_price']) + if line.get('line_total') and line['line_total'] > 0: + line['line_total'] = -abs(line['line_total']) + if line.get('vat_amount') and line['vat_amount'] > 0: + line['vat_amount'] = -abs(line['vat_amount']) + # Add raw response for debugging extraction['_raw_llm_response'] = raw_response @@ -237,18 +315,22 @@ Output: { raise async def _extract_text_from_pdf(self, file_path: Path) -> str: - """Extract text from PDF using PyPDF2""" + """Extract text from PDF using pdfplumber (better table/layout support)""" try: - from PyPDF2 import PdfReader + import pdfplumber - reader = PdfReader(file_path) - text = "" + all_text = [] + with pdfplumber.open(file_path) as pdf: + for page_num, page in enumerate(pdf.pages): + # Strategy: Use regular text extraction (includes tables) + # pdfplumber's extract_text() handles tables better than PyPDF2 + page_text = page.extract_text(layout=True, x_tolerance=2, y_tolerance=2) + + if page_text: + all_text.append(page_text) - for page_num, page in enumerate(reader.pages): - page_text = page.extract_text() - text += f"\\n--- Side {page_num + 1} ---\\n{page_text}" - - logger.info(f"📄 Extracted {len(text)} chars from PDF with {len(reader.pages)} pages") + text = "\\n".join(all_text) + logger.info(f"📄 Extracted {len(text)} chars from PDF with pdfplumber") return text except Exception as e: diff --git a/app/settings/backend/router.py b/app/settings/backend/router.py index f5a8361..ab5c1a3 100644 --- a/app/settings/backend/router.py +++ b/app/settings/backend/router.py @@ -237,3 +237,29 @@ async def reset_user_password(user_id: int, new_password: str): logger.info(f"✅ Reset password for user: {user_id}") return {"message": "Password reset successfully"} + + +# AI Prompts Endpoint +@router.get("/ai-prompts", tags=["Settings"]) +async def get_ai_prompts(): + """Get all AI prompts used in the system""" + from app.services.ollama_service import OllamaService + + ollama_service = OllamaService() + + prompts = { + "invoice_extraction": { + "name": "Faktura Udtrækning (Invoice Extraction)", + "description": "System prompt brugt til at udtrække data fra fakturaer og kreditnotaer via Ollama LLM", + "model": ollama_service.model, + "endpoint": ollama_service.endpoint, + "prompt": ollama_service._build_system_prompt(), + "parameters": { + "temperature": 0.1, + "top_p": 0.9, + "num_predict": 2000 + } + } + } + + return prompts diff --git a/app/settings/frontend/settings.html b/app/settings/frontend/settings.html index e2b5a83..f49799e 100644 --- a/app/settings/frontend/settings.html +++ b/app/settings/frontend/settings.html @@ -89,6 +89,9 @@ Brugere + + AI Prompts + System @@ -177,6 +180,23 @@
+ +
+
+
+ AI System Prompts +
+

+ Her kan du se de prompts der bruges til forskellige AI funktioner i systemet. +

+
+
+
+
+
+
+
+
@@ -459,6 +479,76 @@ function getInitials(name) { return name.split(' ').map(word => word[0]).join('').substring(0, 2).toUpperCase(); } +// Load AI Prompts +async function loadAIPrompts() { + try { + const response = await fetch('/api/v1/ai-prompts'); + const prompts = await response.json(); + + const container = document.getElementById('aiPromptsContent'); + container.innerHTML = Object.entries(prompts).map(([key, prompt]) => ` +
+
+
+
+
${escapeHtml(prompt.name)}
+ ${escapeHtml(prompt.description)} +
+ +
+
+
+
+
+ Model: +
${escapeHtml(prompt.model)}
+
+
+ Endpoint: +
${escapeHtml(prompt.endpoint)}
+
+
+ Parametre: +
${JSON.stringify(prompt.parameters)}
+
+
+
+ System Prompt: +
${escapeHtml(prompt.prompt)}
+
+
+
+ `).join(''); + + } catch (error) { + console.error('Error loading AI prompts:', error); + document.getElementById('aiPromptsContent').innerHTML = + '
Kunne ikke indlæse AI prompts
'; + } +} + +function copyPrompt(key) { + const promptElement = document.getElementById(`prompt_${key}`); + const text = promptElement.textContent; + + navigator.clipboard.writeText(text).then(() => { + // Show success feedback + const btn = event.target.closest('button'); + const originalHtml = btn.innerHTML; + btn.innerHTML = 'Kopieret!'; + btn.classList.remove('btn-outline-primary'); + btn.classList.add('btn-success'); + + setTimeout(() => { + btn.innerHTML = originalHtml; + btn.classList.remove('btn-success'); + btn.classList.add('btn-outline-primary'); + }, 2000); + }); +} + function escapeHtml(text) { const div = document.createElement('div'); div.textContent = text; @@ -495,6 +585,8 @@ document.querySelectorAll('.settings-nav .nav-link').forEach(link => { // Load data for tab if (tab === 'users') { loadUsers(); + } else if (tab === 'ai-prompts') { + loadAIPrompts(); } }); }); diff --git a/app/shared/frontend/base.html b/app/shared/frontend/base.html index 4456cbe..c47df0a 100644 --- a/app/shared/frontend/base.html +++ b/app/shared/frontend/base.html @@ -451,26 +451,41 @@ } }); - // Global Search Modal (Cmd+K) - const searchModal = new bootstrap.Modal(document.getElementById('globalSearchModal')); - const searchInput = document.getElementById('globalSearchInput'); - - // Keyboard shortcut: Cmd+K or Ctrl+K - document.addEventListener('keydown', (e) => { - if ((e.metaKey || e.ctrlKey) && e.key === 'k') { - e.preventDefault(); - searchModal.show(); - setTimeout(() => { - searchInput.focus(); - loadLiveStats(); - loadRecentActivity(); - }, 300); - } + // Global Search Modal (Cmd+K) - Initialize after DOM is ready + document.addEventListener('DOMContentLoaded', () => { + const searchModal = new bootstrap.Modal(document.getElementById('globalSearchModal')); + const searchInput = document.getElementById('globalSearchInput'); - // ESC to close - if (e.key === 'Escape') { - searchModal.hide(); - } + // Keyboard shortcut: Cmd+K or Ctrl+K + document.addEventListener('keydown', (e) => { + if ((e.metaKey || e.ctrlKey) && e.key === 'k') { + e.preventDefault(); + console.log('Cmd+K pressed - opening search modal'); // Debug + searchModal.show(); + setTimeout(() => { + searchInput.focus(); + loadLiveStats(); + loadRecentActivity(); + }, 300); + } + + // ESC to close + if (e.key === 'Escape') { + searchModal.hide(); + } + }); + + // Reset search when modal is closed + document.getElementById('globalSearchModal').addEventListener('hidden.bs.modal', () => { + searchInput.value = ''; + selectedEntity = null; + document.getElementById('emptyState').style.display = 'block'; + document.getElementById('workflowActions').style.display = 'none'; + document.getElementById('crmResults').style.display = 'none'; + document.getElementById('supportResults').style.display = 'none'; + if (document.getElementById('salesResults')) document.getElementById('salesResults').style.display = 'none'; + if (document.getElementById('financeResults')) document.getElementById('financeResults').style.display = 'none'; + }); }); // Load live statistics for the three boxes @@ -742,18 +757,6 @@ `; document.head.appendChild(style); }); - - // Reset search when modal is closed - document.getElementById('globalSearchModal').addEventListener('hidden.bs.modal', () => { - searchInput.value = ''; - selectedEntity = null; - document.getElementById('emptyState').style.display = 'block'; - document.getElementById('workflowActions').style.display = 'none'; - document.getElementById('crmResults').style.display = 'none'; - document.getElementById('supportResults').style.display = 'none'; - if (document.getElementById('salesResults')) document.getElementById('salesResults').style.display = 'none'; - if (document.getElementById('financeResults')) document.getElementById('financeResults').style.display = 'none'; - }); {% block extra_js %}{% endblock %} diff --git a/app/vendors/frontend/vendor_detail.html b/app/vendors/frontend/vendor_detail.html index a5d19d0..516c8db 100644 --- a/app/vendors/frontend/vendor_detail.html +++ b/app/vendors/frontend/vendor_detail.html @@ -196,8 +196,31 @@
-
Leverandør Fakturaer
-

Faktura oversigt kommer snart...

+
+
Leverandør Fakturaer
+ 0 +
+
+ + + + + + + + + + + + + + + + +
Fakturanr.DatoForfaldBeløbStatusHandling
+
+
+
@@ -213,6 +236,96 @@
+ + + {% endblock %} {% block extra_js %} @@ -261,14 +374,6 @@ function displayVendor(vendor) { -
-
Prioritet
-
-
- ${vendor.priority} -
-
-
${vendor.economic_supplier_number ? `
e-conomic Leverandør Nr.
@@ -351,6 +456,102 @@ function displayVendor(vendor) {
#${vendor.id}
`; + + // Load invoices + loadVendorInvoices(); +} + +async function loadVendorInvoices() { + try { + const response = await fetch(`/api/v1/supplier-invoices?vendor_id=${vendorId}`); + if (!response.ok) throw new Error('Failed to load invoices'); + + const invoices = await response.json(); + displayInvoices(invoices); + } catch (error) { + console.error('Error loading invoices:', error); + document.getElementById('invoicesTableBody').innerHTML = ` + + + Kunne ikke indlæse fakturaer + + + `; + } +} + +function displayInvoices(invoices) { + const tbody = document.getElementById('invoicesTableBody'); + const count = document.getElementById('invoiceCount'); + + count.textContent = invoices.length; + + if (invoices.length === 0) { + tbody.innerHTML = ` + + + Ingen fakturaer fundet for denne leverandør + + + `; + return; + } + + tbody.innerHTML = invoices.map(invoice => { + const statusClass = getInvoiceStatusClass(invoice.status); + const statusText = getInvoiceStatusText(invoice.status); + + return ` + + ${escapeHtml(invoice.invoice_number)} + ${formatDateShort(invoice.invoice_date)} + ${formatDateShort(invoice.due_date)} + ${formatCurrency(invoice.total_amount, invoice.currency)} + ${statusText} + + + + + + + `; + }).join(''); +} + +function getInvoiceStatusClass(status) { + const classes = { + 'unpaid': 'bg-warning text-dark', + 'paid': 'bg-success', + 'overdue': 'bg-danger', + 'cancelled': 'bg-secondary', + 'pending': 'bg-info' + }; + return classes[status] || 'bg-secondary'; +} + +function getInvoiceStatusText(status) { + const texts = { + 'unpaid': 'Ubetalt', + 'paid': 'Betalt', + 'overdue': 'Forfalden', + 'cancelled': 'Annulleret', + 'pending': 'Afventer' + }; + return texts[status] || status; +} + +function formatDateShort(dateString) { + if (!dateString) return '-'; + const date = new Date(dateString); + return date.toLocaleDateString('da-DK', { day: '2-digit', month: '2-digit', year: 'numeric' }); +} + +function formatCurrency(amount, currency = 'DKK') { + if (!amount) return '-'; + return new Intl.NumberFormat('da-DK', { + style: 'currency', + currency: currency + }).format(amount); } function getCategoryIcon(category) { @@ -376,6 +577,74 @@ function getInitials(name) { return name.split(' ').map(word => word[0]).join('').substring(0, 2).toUpperCase(); } +function editVendor() { + // Get current vendor data and populate form + fetch(`/api/v1/vendors/${vendorId}`) + .then(response => response.json()) + .then(vendor => { + document.getElementById('editName').value = vendor.name || ''; + document.getElementById('editCvr').value = vendor.cvr_number || ''; + document.getElementById('editCategory').value = vendor.category || 'Andet'; + document.getElementById('editDomain').value = vendor.domain || ''; + document.getElementById('editEmail').value = vendor.email || ''; + document.getElementById('editPhone').value = vendor.phone || ''; + document.getElementById('editAddress').value = vendor.address || ''; + document.getElementById('editCity').value = vendor.city || ''; + document.getElementById('editEconomicNumber').value = vendor.economic_supplier_number || ''; + document.getElementById('editNotes').value = vendor.notes || ''; + document.getElementById('editIsActive').checked = vendor.is_active; + + new bootstrap.Modal(document.getElementById('editVendorModal')).show(); + }) + .catch(error => { + console.error('Error loading vendor for edit:', error); + alert('Kunne ikke hente leverandør data'); + }); +} + +async function saveVendor() { + try { + const data = { + name: document.getElementById('editName').value.trim(), + cvr_number: document.getElementById('editCvr').value.trim() || null, + category: document.getElementById('editCategory').value, + domain: document.getElementById('editDomain').value.trim() || null, + email: document.getElementById('editEmail').value.trim() || null, + phone: document.getElementById('editPhone').value.trim() || null, + address: document.getElementById('editAddress').value.trim() || null, + city: document.getElementById('editCity').value.trim() || null, + economic_supplier_number: document.getElementById('editEconomicNumber').value.trim() || null, + notes: document.getElementById('editNotes').value.trim() || null, + is_active: document.getElementById('editIsActive').checked + }; + + if (!data.name) { + alert('Navn er påkrævet'); + return; + } + + const response = await fetch(`/api/v1/vendors/${vendorId}`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(data) + }); + + if (!response.ok) { + const error = await response.json(); + throw new Error(error.detail || 'Kunne ikke gemme ændringer'); + } + + // Close modal and reload vendor + bootstrap.Modal.getInstance(document.getElementById('editVendorModal')).hide(); + await loadVendor(); + alert('✅ Leverandør opdateret!'); + + } catch (error) { + console.error('Error saving vendor:', error); + alert('Fejl: ' + error.message); + } +} + function escapeHtml(text) { const div = document.createElement('div'); div.textContent = text; @@ -393,11 +662,6 @@ function formatDate(dateString) { }); } -function editVendor() { - // TODO: Implement edit modal - alert('Edit funktion kommer snart!'); -} - // Tab navigation document.querySelectorAll('.vertical-nav .nav-link').forEach(link => { link.addEventListener('click', (e) => { diff --git a/app/vendors/frontend/vendors.html b/app/vendors/frontend/vendors.html index 3c7e4c6..66eafe3 100644 --- a/app/vendors/frontend/vendors.html +++ b/app/vendors/frontend/vendors.html @@ -95,14 +95,13 @@ Kontakt Info CVR Kategori - Prioritet Status Handlinger - +
Loading...
@@ -186,10 +185,6 @@ -
- - -
@@ -231,17 +226,25 @@ async function loadVendors() { params.append('category', currentFilter); } + console.log('🔄 Loading vendors from:', `/api/v1/vendors?${params}`); const response = await fetch(`/api/v1/vendors?${params}`); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + const vendors = await response.json(); + console.log('✅ Loaded vendors:', vendors.length); displayVendors(vendors); updatePagination(vendors.length); } catch (error) { - console.error('Error loading vendors:', error); + console.error('❌ Error loading vendors:', error); document.getElementById('vendorsTableBody').innerHTML = ` - Kunne ikke indlæse leverandører + Kunne ikke indlæse leverandører
+ ${error.message} `; } @@ -282,11 +285,6 @@ function displayVendors(vendors) { ${getCategoryIcon(vendor.category)} ${escapeHtml(vendor.category)} - -
- ${vendor.priority} -
- ${vendor.is_active ? 'Aktiv' : 'Inaktiv'} @@ -313,13 +311,6 @@ function getCategoryIcon(category) { return icons[category] || '📦'; } -function getPriorityClass(priority) { - if (priority >= 80) return 'bg-danger text-white'; - if (priority >= 60) return 'bg-warning'; - if (priority >= 40) return 'bg-info'; - return 'bg-secondary text-white'; -} - function getInitials(name) { return name.split(' ').map(word => word[0]).join('').substring(0, 2).toUpperCase(); } @@ -407,10 +398,10 @@ async function createVendor() { } // Search -let searchTimeout; +let vendorSearchTimeout; document.getElementById('searchInput').addEventListener('input', (e) => { - clearTimeout(searchTimeout); - searchTimeout = setTimeout(() => { + clearTimeout(vendorSearchTimeout); + vendorSearchTimeout = setTimeout(() => { searchTerm = e.target.value; currentPage = 0; loadVendors(); diff --git a/migrations/008_credit_notes.sql b/migrations/008_credit_notes.sql new file mode 100644 index 0000000..177b99a --- /dev/null +++ b/migrations/008_credit_notes.sql @@ -0,0 +1,19 @@ +-- Migration 008: Add support for credit notes +-- Add invoice_type column to distinguish between invoices and credit notes + +ALTER TABLE supplier_invoices +ADD COLUMN IF NOT EXISTS invoice_type VARCHAR(20) DEFAULT 'invoice' CHECK (invoice_type IN ('invoice', 'credit_note')); + +-- Update existing records to be 'invoice' type +UPDATE supplier_invoices SET invoice_type = 'invoice' WHERE invoice_type IS NULL; + +-- Add index for filtering by type +CREATE INDEX IF NOT EXISTS idx_supplier_invoices_type ON supplier_invoices(invoice_type); + +-- Add document_type to extractions table +ALTER TABLE extractions +ADD COLUMN IF NOT EXISTS document_type_detected VARCHAR(20) CHECK (document_type_detected IN ('invoice', 'credit_note', 'receipt', 'other')); + +-- Update system prompt context +COMMENT ON COLUMN supplier_invoices.invoice_type IS 'Type of document: invoice or credit_note'; +COMMENT ON COLUMN extractions.document_type_detected IS 'AI-detected document type from extraction'; diff --git a/requirements.txt b/requirements.txt index bd7a8ae..cc52be7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,6 @@ aiohttp==3.10.10 # AI & Document Processing httpx==0.27.2 PyPDF2==3.0.1 +pdfplumber==0.11.4 pytesseract==0.3.13 Pillow==11.0.0 diff --git a/test_ai_analyze.py b/test_ai_analyze.py new file mode 100644 index 0000000..5ab2b0d --- /dev/null +++ b/test_ai_analyze.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +"""Test AI analyze endpoint med CVR filter""" +import requests +import pdfplumber +import json +from pathlib import Path + +# Extract PDF text +pdf_path = Path("uploads/5082481.pdf") +all_text = [] +with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + page_text = page.extract_text(layout=True, x_tolerance=2, y_tolerance=2) + if page_text: + all_text.append(page_text) + +full_text = "\n".join(all_text) + +# Call AI analyze endpoint +print("🧪 Testing AI analyze endpoint...") +response = requests.post( + 'http://localhost:8000/api/v1/supplier-invoices/ai/analyze', + json={'pdf_text': full_text[:2000]}, # First 2000 chars as in the actual code + headers={'Content-Type': 'application/json'} +) + +print(f"Status: {response.status_code}") + +if response.status_code == 200: + result = response.json() + print(f"\n✅ AI Analysis Result:") + print(f" CVR: {result.get('cvr')}") + print(f" Invoice Number: {result.get('invoice_number')}") + print(f" Date: {result.get('invoice_date')}") + print(f" Total: {result.get('total_amount')}") + print(f" Detection Patterns: {result.get('detection_patterns')}") + + # Check CVR filter + found_cvr = result.get('cvr') + OWN_CVR = "44687369" + if found_cvr == OWN_CVR: + print(f"\n❌ FAIL: AI returned OWN_CVR {OWN_CVR} - filter didn't work!") + elif found_cvr == "29522790": + print(f"\n✅ PASS: AI found correct vendor CVR {found_cvr} (DCS ApS)") + else: + print(f"\n⚠️ WARNING: AI found CVR {found_cvr} - unexpected value") +else: + print(f"❌ Error: {response.text}") diff --git a/test_cvr_filter.py b/test_cvr_filter.py new file mode 100644 index 0000000..3a98fdb --- /dev/null +++ b/test_cvr_filter.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Test CVR filter i AI analyze""" +import pdfplumber +import json +import re +from pathlib import Path + +# Extract PDF text +pdf_path = Path("/app/uploads/5082481.pdf") +all_text = [] +with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + page_text = page.extract_text(layout=True, x_tolerance=2, y_tolerance=2) + if page_text: + all_text.append(page_text) + +full_text = "\n".join(all_text) + +print("=== PDF TEXT (first 1500 chars) ===") +print(full_text[:1500]) +print("\n") + +# Find CVR numbers (support both CVR and DK prefix) +cvr_pattern = r'(?:CVR|Momsnr\.?|DK)[:\s-]*(\d{8})' +cvr_matches = re.findall(cvr_pattern, full_text) +print(f"=== Found CVR numbers: {cvr_matches}") +print() + +# Check for OWN_CVR +OWN_CVR = "44687369" +if OWN_CVR in cvr_matches: + print(f"⚠️ WARNING: Found OWN_CVR {OWN_CVR} - should be filtered!") +else: + print(f"✅ OWN_CVR {OWN_CVR} not in CVR list (good)") + +# Check if we found DCS CVR +DCS_CVR = "29522790" +if DCS_CVR in cvr_matches: + print(f"✅ Found vendor CVR {DCS_CVR} (DCS ApS) - correct!") +else: + print(f"❌ Did NOT find vendor CVR {DCS_CVR}") + +print() +print("=== Expected behavior ===") +print("AI should find CVR 29522790 (DCS ApS)") +print(f"AI should IGNORE CVR {OWN_CVR} (BMC Denmark)")