From 2ed3118c83c90ac77674daff8da60529a7a8295d Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 2 Mar 2026 09:01:43 +0100 Subject: [PATCH] fix: exclude OWN_CVR from AI vendor extraction v2.2.25 --- VERSION | 2 +- app/billing/backend/supplier_invoices.py | 33 +++++++++++++++++++++++- app/services/ollama_service.py | 12 ++++++--- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/VERSION b/VERSION index 8389c48..05f68fc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2.24 +2.2.25 diff --git a/app/billing/backend/supplier_invoices.py b/app/billing/backend/supplier_invoices.py index cb5239c..e1418bb 100644 --- a/app/billing/backend/supplier_invoices.py +++ b/app/billing/backend/supplier_invoices.py @@ -2326,7 +2326,38 @@ async def reprocess_uploaded_file(file_id: int): extracted_fields = llm_result confidence = llm_result.get('confidence', 0.75) - + + # Post-process: clear own CVR if AI mistakenly returned it + extracted_cvr = llm_result.get('vendor_cvr') + own_cvr = getattr(settings, 'OWN_CVR', '') + if extracted_cvr and own_cvr and str(extracted_cvr).replace('DK', '').strip() == str(own_cvr).strip(): + logger.warning(f"⚠️ AI returned own CVR ({own_cvr}) as vendor_cvr - clearing it") + llm_result['vendor_cvr'] = None + extracted_cvr = None + + # Try to find vendor in DB by extracted CVR or name (overrides detected_vendor_id) + if extracted_cvr: + cvr_clean = str(extracted_cvr).replace('DK', '').strip() + vendor_row = execute_query_single( + "SELECT id FROM vendors WHERE cvr_number = %s AND is_active = true", + (cvr_clean,)) + if vendor_row: + vendor_id = vendor_row['id'] + logger.info(f"✅ Matched vendor by CVR {cvr_clean}: vendor_id={vendor_id}") + execute_update( + "UPDATE incoming_files SET detected_vendor_id = %s WHERE file_id = %s", + (vendor_id, file_id)) + if not vendor_id and llm_result.get('vendor_name'): + vendor_row = execute_query_single( + "SELECT id FROM vendors WHERE name ILIKE %s AND is_active = true ORDER BY id LIMIT 1", + (f"%{llm_result['vendor_name']}%",)) + if vendor_row: + vendor_id = vendor_row['id'] + logger.info(f"✅ Matched vendor by name '{llm_result['vendor_name']}': vendor_id={vendor_id}") + execute_update( + "UPDATE incoming_files SET detected_vendor_id = %s WHERE file_id = %s", + (vendor_id, file_id)) + # Store AI extracted data in extractions table extraction_id = execute_insert( """INSERT INTO extractions diff --git a/app/services/ollama_service.py b/app/services/ollama_service.py index df84467..65beb74 100644 --- a/app/services/ollama_service.py +++ b/app/services/ollama_service.py @@ -28,14 +28,20 @@ class OllamaService: def _build_system_prompt(self) -> str: """Build Danish system prompt for invoice extraction with CVR""" - return """Du er en ekspert i at læse og udtrække strukturerede data fra danske fakturaer, kreditnotaer og leverandørdokumenter. + own_cvr = getattr(settings, 'OWN_CVR', '29522790') + own_cvr_rule = ( + f"4b. KRITISK - LEVERANDØR CVR: CVR {own_cvr} er VORES eget CVR (køberen/modtageren). " + f"Sæt ALDRIG vendor_cvr til {own_cvr}! Leverandørens CVR er CVR-nummeret der hører til " + f"firmaet som har SENDT fakturaen (ikke modtageren).\n" + ) + return ("""Du er en ekspert i at læse og udtrække strukturerede data fra danske fakturaer, kreditnotaer og leverandørdokumenter. VIGTIGE REGLER: 1. Returner KUN gyldig JSON - ingen forklaring eller ekstra tekst 2. Hvis et felt ikke findes, sæt det til null 3. Beregn confidence baseret på hvor sikker du er på hvert felt (0.0-1.0) 4. Datoer skal være i format YYYY-MM-DD -5. DANSKE PRISFORMATER: +""" + own_cvr_rule + """5. DANSKE PRISFORMATER: - Tusind-separator kan være . (punkt) eller mellemrum: "5.965,18" eller "5 965,18" - Decimal-separator er , (komma): "1.234,56 kr" - I JSON output skal du bruge . (punkt) som decimal: 1234.56 @@ -126,7 +132,7 @@ Output: { "confidence": 0.95 }], "confidence": 0.95 -}""" +}""") async def extract_from_text(self, text: str) -> Dict: """