2025-12-07 03:29:54 +01:00
"""
Supplier Invoices Router - Leverandørfakturaer ( Kassekladde )
Backend API for managing supplier invoices that integrate with e - conomic
"""
from fastapi import APIRouter , HTTPException , UploadFile , File
from typing import List , Dict , Optional
from datetime import datetime , date , timedelta
from decimal import Decimal
from pathlib import Path
from app . core . database import execute_query , execute_insert , execute_update
from app . core . config import settings
from app . services . economic_service import get_economic_service
from app . services . ollama_service import ollama_service
from app . services . template_service import template_service
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
from app . services . invoice2data_service import get_invoice2data_service
2025-12-07 03:29:54 +01:00
import logging
import os
import re
logger = logging . getLogger ( __name__ )
router = APIRouter ( )
def _smart_extract_lines ( text : str ) - > List [ Dict ] :
"""
2025-12-08 09:15:52 +01:00
Universal line extraction using pdfplumber layout mode .
Tries pdfplumber columnar format first , then falls back to vendor - specific patterns .
2025-12-07 03:29:54 +01:00
"""
lines_arr = text . split ( ' \n ' )
items = [ ]
i = 0
while i < len ( lines_arr ) :
line = lines_arr [ i ] . strip ( )
# Skip empty or header lines
if not line or re . search ( r ' (Position|Varenr|Beskrivelse|Antal|Pris|Total|Model) ' , line , re . IGNORECASE ) :
i + = 1
continue
2025-12-08 09:15:52 +01:00
# Pattern 1: pdfplumber layout mode - " 1 95006 Betalingsmetode... 1 41,20 41,20"
# Whitespace-separated columns: position item_number description quantity unit_price total_price
# Most specific pattern - try first!
layout_match = re . match ( r ' ^ \ s*( \ d { 1,2}) \ s+( \ d { 4,10}) \ s+(.+?) \ s( \ d { 1,2}) \ s+([ \ d \ s]+, \ d {2} ) \ s+([ \ d \ s]+, \ d {2} ) \ s*$ ' , line )
if layout_match :
items . append ( {
' line_number ' : len ( items ) + 1 ,
' position ' : layout_match . group ( 1 ) ,
' item_number ' : layout_match . group ( 2 ) ,
' description ' : layout_match . group ( 3 ) . strip ( ) ,
' quantity ' : layout_match . group ( 4 ) ,
' unit_price ' : layout_match . group ( 5 ) . replace ( ' ' , ' ' ) . replace ( ' , ' , ' . ' ) ,
' total_price ' : layout_match . group ( 6 ) . replace ( ' ' , ' ' ) . replace ( ' , ' , ' . ' ) ,
' raw_text ' : line
} )
logger . info ( f " ✅ pdfplumber layout: { layout_match . group ( 2 ) } - { layout_match . group ( 3 ) [ : 30 ] } ... " )
i + = 1
continue
# Pattern 2: ALSO format - "100 48023976 REFURB LENOVO..." (multi-line)
2025-12-07 03:29:54 +01:00
item_match = re . match ( r ' ^( \ d { 1,3}) \ s+( \ d { 6,}) \ s+(.+) ' , line )
if item_match :
position = item_match . group ( 1 )
item_number = item_match . group ( 2 )
description = item_match . group ( 3 ) . strip ( )
# Find næste linje med antal+priser
quantity = None
unit_price = None
total_price = None
for j in range ( i + 1 , min ( i + 10 , len ( lines_arr ) ) ) :
price_line = lines_arr [ j ] . strip ( )
price_match = re . match ( r ' ^( \ d+) \ s*(?:ST|stk|pc|pcs) \ s+([ \ d.,]+) \ s+([ \ d.,]+) ' , price_line , re . IGNORECASE )
if price_match :
quantity = price_match . group ( 1 )
unit_price = price_match . group ( 2 ) . replace ( ' , ' , ' . ' )
total_price = price_match . group ( 3 ) . replace ( ' , ' , ' . ' )
break
if quantity and unit_price :
items . append ( {
' line_number ' : len ( items ) + 1 ,
' position ' : position ,
' item_number ' : item_number ,
' description ' : description ,
' quantity ' : quantity ,
' unit_price ' : unit_price ,
' total_price ' : total_price ,
' raw_text ' : f " { line } ... { quantity } ST { unit_price } { total_price } "
} )
logger . info ( f " ✅ ALSO: { item_number } - { description [ : 30 ] } ... " )
i + = 1
continue
2025-12-08 09:15:52 +01:00
# Pattern 3: DCS single-line - "195006Betalingsmetode... 141,2041,20" (legacy PyPDF2 format)
# Position: 1 digit, Item: 4-10 digits, Description starts with letter
# Prices: Danish format 1-3 digits, comma, 2 decimals (e.g., 41,20 or 619,00)
# Quantity: 1-2 digits (non-greedy) before first price
dcs_match = re . match ( r ' ^( \ d)( \ d { 4,10})([A-Za-z].+?)( \ d { 1,2}?)( \ d { 1,3}, \ d {2} )( \ d { 1,3}, \ d {2} )$ ' , line )
2025-12-07 03:29:54 +01:00
if dcs_match :
items . append ( {
' line_number ' : len ( items ) + 1 ,
' position ' : dcs_match . group ( 1 ) ,
' item_number ' : dcs_match . group ( 2 ) ,
' description ' : dcs_match . group ( 3 ) . strip ( ) ,
' quantity ' : dcs_match . group ( 4 ) ,
' unit_price ' : dcs_match . group ( 5 ) . replace ( ' , ' , ' . ' ) ,
' total_price ' : dcs_match . group ( 6 ) . replace ( ' , ' , ' . ' ) ,
' raw_text ' : line
} )
2025-12-08 09:15:52 +01:00
logger . info ( f " ✅ DCS single-line: { dcs_match . group ( 2 ) } - { dcs_match . group ( 3 ) [ : 30 ] } ... " )
i + = 1
continue
# Pattern 4: DCS multi-line - "2994922511Ubiquiti..." then search for "...USW-FLEX 1619,00619,00" (legacy)
dcs_multi_match = re . match ( r ' ^( \ d)( \ d { 4,10})([A-Za-z].+)$ ' , line )
if dcs_multi_match and not re . search ( r ' KN8|EAN|Model|Position|Varenr|Tekst ' , line ) :
position = dcs_multi_match . group ( 1 )
item_number = dcs_multi_match . group ( 2 )
description = dcs_multi_match . group ( 3 ) . strip ( )
# Search next 5 lines for quantity/prices (Danish format 1-3 digits before comma)
for j in range ( 1 , 6 ) :
if i + j > = len ( lines_arr ) :
break
price_line = lines_arr [ i + j ] . strip ( )
# Match: "S/N: ...USW-FLEX 1619,00619,00" - qty (1-2 digits, non-greedy) + TWO prices
price_match = re . search ( r ' ( \ d { 1,2}?)( \ d { 1,3}, \ d {2} )( \ d { 1,3}, \ d {2} ) \ s*$ ' , price_line )
if price_match :
quantity = price_match . group ( 1 )
unit_price = price_match . group ( 2 ) . replace ( ' , ' , ' . ' )
total_price = price_match . group ( 3 ) . replace ( ' , ' , ' . ' )
items . append ( {
' line_number ' : len ( items ) + 1 ,
' position ' : position ,
' item_number ' : item_number ,
' description ' : description ,
' quantity ' : quantity ,
' unit_price ' : unit_price ,
' total_price ' : total_price ,
' raw_text ' : f " { line } ... { price_line } "
} )
logger . info ( f " ✅ DCS multi-line: { item_number } - { description [ : 30 ] } ... " )
break
2025-12-07 03:29:54 +01:00
i + = 1
continue
i + = 1
if items :
logger . info ( f " 📦 Multi-line extraction found { len ( items ) } items " )
else :
logger . warning ( " ⚠️ Multi-line extraction found no items " )
return items
# ========== CRUD OPERATIONS ==========
@router.get ( " /supplier-invoices " )
async def list_supplier_invoices (
status : Optional [ str ] = None ,
vendor_id : Optional [ int ] = None ,
overdue_only : bool = False
) :
"""
List all supplier invoices with filtering options
Args :
status : Filter by status ( pending , approved , sent_to_economic , paid , overdue , cancelled )
vendor_id : Filter by vendor
overdue_only : Only show overdue unpaid invoices
"""
try :
query = """
SELECT
si . * ,
v . name as vendor_full_name ,
v . economic_supplier_number as vendor_economic_id ,
CASE
WHEN si . paid_date IS NOT NULL THEN ' paid '
WHEN si . due_date < CURRENT_DATE AND si . paid_date IS NULL THEN ' overdue '
ELSE si . status
END as computed_status
FROM supplier_invoices si
LEFT JOIN vendors v ON si . vendor_id = v . id
WHERE 1 = 1
"""
params = [ ]
if status :
query + = " AND si.status = %s "
params . append ( status )
if vendor_id :
query + = " AND si.vendor_id = %s "
params . append ( vendor_id )
if overdue_only :
query + = " AND si.due_date < CURRENT_DATE AND si.paid_date IS NULL "
query + = " ORDER BY si.due_date ASC, si.invoice_date DESC "
invoices = execute_query ( query , tuple ( params ) if params else ( ) )
2025-12-08 09:15:52 +01:00
# Add lines to each invoice
for invoice in invoices :
lines = execute_query (
" SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number " ,
( invoice [ ' id ' ] , )
)
invoice [ ' lines ' ] = lines
return invoices
except Exception as e :
logger . error ( f " ❌ Failed to list supplier invoices: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /pending-supplier-invoice-files " )
async def get_pending_files ( ) :
""" Hent alle filer der venter på behandling, inkl. AI-extracted """
try :
# Hent både pending files OG ai_extracted files
files = execute_query (
""" SELECT DISTINCT ON (f.file_id)
f . file_id ,
f . filename ,
f . status ,
f . uploaded_at ,
f . error_message ,
f . template_id ,
f . file_path ,
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
- - Quick analysis results ( available immediately on upload )
f . detected_cvr ,
f . detected_vendor_id ,
f . detected_document_type ,
f . detected_document_number ,
f . is_own_invoice ,
v_detected . name as detected_vendor_name ,
v_detected . cvr_number as detected_vendor_cvr ,
2025-12-08 09:15:52 +01:00
- - Get vendor info from latest extraction
ext . vendor_name ,
ext . vendor_cvr ,
ext . vendor_matched_id ,
v . name as matched_vendor_name ,
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
v . cvr_number as matched_vendor_cvr_number ,
2025-12-08 09:15:52 +01:00
- - Check if already has invoice via latest extraction only
si . id as existing_invoice_id ,
si . invoice_number as existing_invoice_number
FROM incoming_files f
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
LEFT JOIN vendors v_detected ON v_detected . id = f . detected_vendor_id
2025-12-08 09:15:52 +01:00
LEFT JOIN LATERAL (
SELECT extraction_id , file_id , vendor_name , vendor_cvr , vendor_matched_id
FROM extractions
WHERE file_id = f . file_id
ORDER BY created_at DESC
LIMIT 1
) ext ON true
LEFT JOIN vendors v ON v . id = ext . vendor_matched_id
LEFT JOIN supplier_invoices si ON si . extraction_id = ext . extraction_id
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
WHERE f . status IN ( ' pending ' , ' processing ' , ' failed ' , ' ai_extracted ' , ' processed ' , ' duplicate ' )
2025-12-08 09:15:52 +01:00
AND si . id IS NULL - - Only show files without invoice yet
ORDER BY f . file_id , f . uploaded_at DESC """
)
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Convert to regular dicts so we can add new keys
files = [ dict ( file ) for file in files ] if files else [ ]
# Check for invoice2data templates for each file
try :
from app . services . invoice2data_service import get_invoice2data_service
invoice2data = get_invoice2data_service ( )
logger . info ( f " 📋 Checking invoice2data templates: { len ( invoice2data . templates ) } loaded " )
for file in files :
# Check if there's an invoice2data template for this vendor's CVR
vendor_cvr = file . get ( ' matched_vendor_cvr_number ' ) or file . get ( ' detected_vendor_cvr ' ) or file . get ( ' vendor_cvr ' )
file [ ' has_invoice2data_template ' ] = False
logger . debug ( f " File { file [ ' file_id ' ] } : CVR= { vendor_cvr } " )
if vendor_cvr :
# Check all templates for this CVR in keywords
for template_name , template_data in invoice2data . templates . items ( ) :
keywords = template_data . get ( ' keywords ' , [ ] )
logger . debug ( f " Template { template_name } : keywords= { keywords } " )
if str ( vendor_cvr ) in [ str ( k ) for k in keywords ] :
file [ ' has_invoice2data_template ' ] = True
file [ ' invoice2data_template_name ' ] = template_name
logger . info ( f " ✅ File { file [ ' file_id ' ] } matched template: { template_name } " )
break
except Exception as e :
logger . error ( f " ❌ Failed to check invoice2data templates: { e } " , exc_info = True )
# Continue without invoice2data info
2025-12-08 09:15:52 +01:00
return { " files " : files if files else [ ] , " count " : len ( files ) if files else 0 }
except Exception as e :
logger . error ( f " ❌ Failed to get pending files: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
@router.get ( " /supplier-invoices/files/ {file_id} /pdf-text " )
async def get_file_pdf_text ( file_id : int ) :
""" Hent fuld PDF tekst fra en uploaded fil (til template builder) """
try :
# Get file info
file_info = execute_query (
" SELECT file_path, filename FROM incoming_files WHERE file_id = %s " ,
( file_id , ) ,
fetchone = True
)
if not file_info :
raise HTTPException ( status_code = 404 , detail = " Fil ikke fundet " )
# Read PDF text
from pathlib import Path
file_path = Path ( file_info [ ' file_path ' ] )
if not file_path . exists ( ) :
raise HTTPException ( status_code = 404 , detail = f " Fil ikke fundet på disk: { file_path } " )
pdf_text = await ollama_service . _extract_text_from_file ( file_path )
return {
" file_id " : file_id ,
" filename " : file_info [ ' filename ' ] ,
" pdf_text " : pdf_text
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to get PDF text: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-08 09:15:52 +01:00
@router.get ( " /supplier-invoices/files/ {file_id} /extracted-data " )
async def get_file_extracted_data ( file_id : int ) :
""" Hent AI-extracted data fra en uploaded fil """
try :
# Get file info
file_info = execute_query (
" SELECT * FROM incoming_files WHERE file_id = %s " ,
( file_id , ) ,
fetchone = True
)
if not file_info :
raise HTTPException ( status_code = 404 , detail = " Fil ikke fundet " )
# Get extraction results if exists
extraction = execute_query (
" SELECT * FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1 " ,
( file_id , ) ,
fetchone = True
)
2025-12-08 23:46:18 +01:00
# Parse llm_response_json if it exists (from AI or template extraction)
llm_json_data = None
if extraction and extraction . get ( ' llm_response_json ' ) :
import json
try :
llm_json_data = json . loads ( extraction [ ' llm_response_json ' ] ) if isinstance ( extraction [ ' llm_response_json ' ] , str ) else extraction [ ' llm_response_json ' ]
logger . info ( f " 📊 Parsed llm_response_json: invoice_number= { llm_json_data . get ( ' invoice_number ' ) } " )
except Exception as e :
logger . warning ( f " ⚠️ Failed to parse llm_response_json: { e } " )
2025-12-08 09:15:52 +01:00
# Get extraction lines if exist
extraction_lines = [ ]
if extraction :
extraction_lines = execute_query (
""" SELECT * FROM extraction_lines
WHERE extraction_id = % s
ORDER BY line_number """ ,
( extraction [ ' extraction_id ' ] , )
)
# Read PDF text if needed
pdf_text = None
if file_info [ ' file_path ' ] :
from pathlib import Path
file_path = Path ( file_info [ ' file_path ' ] )
if file_path . exists ( ) :
pdf_text = await ollama_service . _extract_text_from_file ( file_path )
2025-12-08 23:46:18 +01:00
# Format line items for frontend
formatted_lines = [ ]
if extraction_lines :
for line in extraction_lines :
formatted_lines . append ( {
" description " : line . get ( ' description ' ) ,
" quantity " : float ( line . get ( ' quantity ' ) ) if line . get ( ' quantity ' ) else None ,
" unit_price " : float ( line . get ( ' unit_price ' ) ) if line . get ( ' unit_price ' ) else None ,
" vat_rate " : float ( line . get ( ' vat_rate ' ) ) if line . get ( ' vat_rate ' ) else None ,
" line_total " : float ( line . get ( ' line_total ' ) ) if line . get ( ' line_total ' ) else None ,
" vat_note " : line . get ( ' vat_note ' )
} )
elif llm_json_data and llm_json_data . get ( ' lines ' ) :
# Use lines from LLM JSON response
for line in llm_json_data [ ' lines ' ] :
formatted_lines . append ( {
" description " : line . get ( ' description ' ) ,
" quantity " : float ( line . get ( ' quantity ' ) ) if line . get ( ' quantity ' ) else None ,
" unit_price " : float ( line . get ( ' unit_price ' ) ) if line . get ( ' unit_price ' ) else None ,
" vat_rate " : float ( line . get ( ' vat_rate ' ) ) if line . get ( ' vat_rate ' ) else None ,
" line_total " : float ( line . get ( ' line_total ' ) ) if line . get ( ' line_total ' ) else None ,
" vat_note " : line . get ( ' vat_note ' )
} )
# Build llm_data response
llm_data = None
if llm_json_data :
# Use invoice_number from LLM JSON (works for both AI and template extraction)
llm_data = {
" invoice_number " : llm_json_data . get ( ' invoice_number ' ) ,
" invoice_date " : llm_json_data . get ( ' invoice_date ' ) ,
" due_date " : llm_json_data . get ( ' due_date ' ) ,
" total_amount " : float ( llm_json_data . get ( ' total_amount ' ) ) if llm_json_data . get ( ' total_amount ' ) else None ,
" currency " : llm_json_data . get ( ' currency ' ) or ' DKK ' ,
" document_type " : llm_json_data . get ( ' document_type ' ) ,
" lines " : formatted_lines
}
elif extraction :
# Fallback to extraction table columns if no LLM JSON
llm_data = {
" invoice_number " : extraction . get ( ' document_id ' ) ,
" invoice_date " : extraction . get ( ' document_date ' ) . isoformat ( ) if extraction . get ( ' document_date ' ) else None ,
" due_date " : extraction . get ( ' due_date ' ) . isoformat ( ) if extraction . get ( ' due_date ' ) else None ,
" total_amount " : float ( extraction . get ( ' total_amount ' ) ) if extraction . get ( ' total_amount ' ) else None ,
" currency " : extraction . get ( ' currency ' ) or ' DKK ' ,
" document_type " : extraction . get ( ' document_type ' ) ,
" lines " : formatted_lines
}
# Get vendor from extraction
vendor_matched_id = extraction . get ( ' vendor_matched_id ' ) if extraction else None
2025-12-08 09:15:52 +01:00
return {
" file_id " : file_id ,
" filename " : file_info [ ' filename ' ] ,
" status " : file_info [ ' status ' ] ,
" uploaded_at " : file_info [ ' uploaded_at ' ] ,
2025-12-08 23:46:18 +01:00
" vendor_matched_id " : vendor_matched_id ,
" llm_data " : llm_data ,
2025-12-08 09:15:52 +01:00
" extraction " : extraction ,
" extraction_lines " : extraction_lines if extraction_lines else [ ] ,
" pdf_text_preview " : pdf_text [ : 5000 ] if pdf_text else None
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to get extracted data: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /supplier-invoices/files/ {file_id} /download " )
async def download_pending_file ( file_id : int ) :
""" View PDF in browser """
from fastapi . responses import FileResponse
from pathlib import Path
try :
# Get file info
file_info = execute_query (
" SELECT * FROM incoming_files WHERE file_id = %s " ,
( file_id , ) ,
fetchone = True
)
if not file_info :
raise HTTPException ( status_code = 404 , detail = " Fil ikke fundet " )
file_path = Path ( file_info [ ' file_path ' ] )
if not file_path . exists ( ) :
raise HTTPException ( status_code = 404 , detail = " Fil findes ikke på disk " )
# Return with inline disposition so browser displays it instead of downloading
return FileResponse (
path = str ( file_path ) ,
media_type = ' application/pdf ' ,
headers = { " Content-Disposition " : f " inline; filename= { file_info [ ' filename ' ] } " }
)
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to view file: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-08 23:46:18 +01:00
@router.get ( " /supplier-invoices/files/ {file_id} /pdf " )
async def get_file_pdf ( file_id : int ) :
""" Get PDF file for viewing (alias for download endpoint) """
return await download_pending_file ( file_id )
2025-12-08 09:15:52 +01:00
@router.post ( " /supplier-invoices/files/ {file_id} /link-vendor " )
async def link_vendor_to_extraction ( file_id : int , data : dict ) :
""" Link an existing vendor to the extraction """
try :
vendor_id = data . get ( ' vendor_id ' )
if not vendor_id :
raise HTTPException ( status_code = 400 , detail = " vendor_id is required " )
# Verify vendor exists
vendor = execute_query (
" SELECT id, name FROM vendors WHERE id = %s " ,
( vendor_id , ) ,
fetchone = True
)
if not vendor :
raise HTTPException ( status_code = 404 , detail = " Leverandør ikke fundet " )
# Get latest extraction for this file
extraction = execute_query (
" SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1 " ,
( file_id , ) ,
fetchone = True
)
if not extraction :
raise HTTPException ( status_code = 404 , detail = " Ingen extraction fundet for denne fil " )
# Update extraction with vendor match
execute_update (
""" UPDATE extractions
SET vendor_matched_id = % s
WHERE extraction_id = % s """ ,
( vendor_id , extraction [ ' extraction_id ' ] )
)
logger . info ( f " ✅ Linked vendor { vendor [ ' name ' ] } (ID: { vendor_id } ) to extraction for file { file_id } " )
return {
" status " : " success " ,
" vendor_id " : vendor_id ,
" vendor_name " : vendor [ ' name ' ]
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to link vendor: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.delete ( " /supplier-invoices/files/ {file_id} " )
async def delete_pending_file_endpoint ( file_id : int ) :
""" Slet uploaded fil og relateret data """
import os
from pathlib import Path
try :
# Get file info
file_info = execute_query (
" SELECT * FROM incoming_files WHERE file_id = %s " ,
( file_id , ) ,
fetchone = True
)
if not file_info :
raise HTTPException ( status_code = 404 , detail = " Fil ikke fundet " )
# Check if already converted to invoice
invoice_exists = execute_query (
""" SELECT si.id FROM supplier_invoices si
JOIN extractions e ON si . extraction_id = e . extraction_id
WHERE e . file_id = % s """ ,
( file_id , ) ,
fetchone = True
)
if invoice_exists :
raise HTTPException (
status_code = 400 ,
detail = " Kan ikke slette fil - der er allerede oprettet en faktura fra denne fil "
)
# Delete from database (cascade will handle extractions)
execute_update (
" DELETE FROM incoming_files WHERE file_id = %s " ,
( file_id , )
)
# Delete physical file
if file_info [ ' file_path ' ] :
file_path = Path ( file_info [ ' file_path ' ] )
if file_path . exists ( ) :
os . remove ( file_path )
logger . info ( f " 🗑️ Deleted file: { file_path } " )
return { " message " : " Fil slettet " , " file_id " : file_id }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to delete file: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.patch ( " /supplier-invoices/files/ {file_id} " )
async def update_file_status ( file_id : int , data : dict ) :
""" Opdater status på uploadet fil """
try :
allowed_statuses = [ ' pending ' , ' processing ' , ' processed ' , ' ai_extracted ' , ' completed ' , ' failed ' ]
new_status = data . get ( ' status ' )
if not new_status or new_status not in allowed_statuses :
raise HTTPException ( status_code = 400 , detail = f " Ugyldig status. Tilladte: { ' , ' . join ( allowed_statuses ) } " )
execute_update (
" UPDATE incoming_files SET status = %s , processed_at = CURRENT_TIMESTAMP WHERE file_id = %s " ,
( new_status , file_id )
)
logger . info ( f " ✅ Updated file { file_id } status to { new_status } " )
return { " status " : " success " , " file_id " : file_id , " new_status " : new_status }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to update file status: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.post ( " /supplier-invoices/files/ {file_id} /link-vendor " )
async def link_vendor_to_extraction ( file_id : int , data : dict ) :
""" Link en eksisterende leverandør til en extraction """
try :
vendor_id = data . get ( ' vendor_id ' )
if not vendor_id :
raise HTTPException ( status_code = 400 , detail = " vendor_id er påkrævet " )
# Verify vendor exists
vendor = execute_query (
" SELECT id, name FROM vendors WHERE id = %s " ,
( vendor_id , ) ,
fetchone = True
)
if not vendor :
raise HTTPException ( status_code = 404 , detail = f " Leverandør { vendor_id } ikke fundet " )
# Get latest extraction for this file
extraction = execute_query (
" SELECT extraction_id FROM extractions WHERE file_id = %s ORDER BY created_at DESC LIMIT 1 " ,
( file_id , ) ,
fetchone = True
)
if not extraction :
raise HTTPException ( status_code = 404 , detail = " Ingen extraction fundet for denne fil " )
# Update extraction with vendor match
execute_update (
" UPDATE extractions SET vendor_matched_id = %s WHERE extraction_id = %s " ,
( vendor_id , extraction [ ' extraction_id ' ] )
)
logger . info ( f " ✅ Linked vendor { vendor [ ' name ' ] } (ID: { vendor_id } ) to extraction { extraction [ ' extraction_id ' ] } " )
return {
" status " : " success " ,
" vendor_id " : vendor_id ,
" vendor_name " : vendor [ ' name ' ] ,
" extraction_id " : extraction [ ' extraction_id ' ]
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to link vendor: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.post ( " /supplier-invoices/from-extraction/ {file_id} " )
async def create_invoice_from_extraction ( file_id : int ) :
""" Opret leverandørfaktura fra extraction data """
try :
# Get latest extraction for this file
extraction = execute_query (
""" SELECT e.*, v.name as vendor_name
FROM extractions e
LEFT JOIN vendors v ON v . id = e . vendor_matched_id
WHERE e . file_id = % s
ORDER BY e . created_at DESC
LIMIT 1 """ ,
( file_id , ) ,
fetchone = True
)
if not extraction :
raise HTTPException ( status_code = 404 , detail = " Ingen extraction fundet for denne fil " )
# Check if vendor is matched
if not extraction [ ' vendor_matched_id ' ] :
raise HTTPException (
status_code = 400 ,
detail = " Leverandør skal linkes før faktura kan oprettes. Brug ' Link eller Opret Leverandør ' først. "
)
# Check if invoice already exists
existing = execute_query (
" SELECT id FROM supplier_invoices WHERE extraction_id = %s " ,
( extraction [ ' extraction_id ' ] , ) ,
fetchone = True
)
if existing :
raise HTTPException ( status_code = 400 , detail = " Faktura er allerede oprettet fra denne extraction " )
# Get extraction lines
lines = execute_query (
""" SELECT * FROM extraction_lines
WHERE extraction_id = % s
ORDER BY line_number """ ,
( extraction [ ' extraction_id ' ] , )
)
# Parse LLM response JSON if it's a string
import json
llm_data = extraction . get ( ' llm_response_json ' )
if isinstance ( llm_data , str ) :
try :
llm_data = json . loads ( llm_data )
except :
llm_data = { }
elif not llm_data :
llm_data = { }
# Get invoice number and type from LLM data or generate one
invoice_number = llm_data . get ( ' invoice_number ' ) if llm_data else None
if not invoice_number :
invoice_number = f " INV- { file_id } "
# Detect document type (invoice or credit_note)
document_type = llm_data . get ( ' document_type ' , ' invoice ' ) if llm_data else ' invoice '
invoice_type = ' credit_note ' if document_type == ' credit_note ' else ' invoice '
# Get dates - use today as fallback if missing
from datetime import datetime , timedelta
invoice_date = extraction . get ( ' document_date ' )
if not invoice_date :
invoice_date = datetime . now ( ) . strftime ( ' % Y- % m- %d ' )
logger . warning ( f " ⚠️ No invoice_date found, using today: { invoice_date } " )
due_date = extraction . get ( ' due_date ' )
if not due_date :
# Default to 30 days from invoice date
inv_date_obj = datetime . strptime ( invoice_date , ' % Y- % m- %d ' )
due_date = ( inv_date_obj + timedelta ( days = 30 ) ) . strftime ( ' % Y- % m- %d ' )
logger . warning ( f " ⚠️ No due_date found, using invoice_date + 30 days: { due_date } " )
# Create supplier invoice
invoice_id = execute_insert (
""" INSERT INTO supplier_invoices (
vendor_id , invoice_number , invoice_date , due_date ,
total_amount , currency , status , extraction_id , notes , invoice_type
) VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s )
RETURNING id """ ,
(
extraction [ ' vendor_matched_id ' ] ,
invoice_number ,
invoice_date ,
due_date ,
extraction [ ' total_amount ' ] ,
extraction [ ' currency ' ] ,
' credited ' if invoice_type == ' credit_note ' else ' unpaid ' ,
extraction [ ' extraction_id ' ] ,
f " Oprettet fra AI extraction (file_id: { file_id } ) " ,
invoice_type
)
)
# Create invoice lines
if lines :
for line in lines :
execute_update (
""" INSERT INTO supplier_invoice_lines (
supplier_invoice_id , description , quantity , unit_price ,
line_total , vat_rate , vat_amount
) VALUES ( % s , % s , % s , % s , % s , % s , % s ) """ ,
(
invoice_id ,
line [ ' description ' ] ,
line . get ( ' quantity ' ) or 1 ,
line . get ( ' unit_price ' ) or 0 ,
line . get ( ' line_total ' ) or 0 ,
line . get ( ' vat_rate ' ) or 25.00 , # Default 25% Danish VAT if NULL
line . get ( ' vat_amount ' )
)
)
# Update file status
execute_update (
" UPDATE incoming_files SET status = ' completed ' WHERE file_id = %s " ,
( file_id , )
)
logger . info ( f " ✅ Created supplier invoice { invoice_id } from extraction { extraction [ ' extraction_id ' ] } " )
return {
" status " : " success " ,
" invoice_id " : invoice_id ,
" invoice_number " : invoice_number ,
" vendor_name " : extraction [ ' vendor_name ' ] ,
" total_amount " : extraction [ ' total_amount ' ] ,
" currency " : extraction [ ' currency ' ]
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to create invoice from extraction: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
# Keep existing endpoints below...
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to delete file: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
# ========== TEMPLATE MANAGEMENT (must be before {invoice_id} route) ==========
@router.get ( " /supplier-invoices/templates " )
async def list_templates ( ) :
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
""" Hent alle templates (både database og invoice2data YAML) """
2025-12-08 09:15:52 +01:00
try :
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Get database templates
2025-12-08 09:15:52 +01:00
query = """
SELECT t . * , v . name as vendor_name
FROM supplier_invoice_templates t
LEFT JOIN vendors v ON t . vendor_id = v . id
WHERE t . is_active = true
ORDER BY t . created_at DESC
"""
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
db_templates = execute_query ( query ) or [ ]
# Get invoice2data templates
invoice2data_service = get_invoice2data_service ( )
invoice2data_templates = [ ]
for template_name , template_data in invoice2data_service . templates . items ( ) :
# Extract vendor CVR from keywords
vendor_cvr = None
keywords = template_data . get ( ' keywords ' , [ ] )
for keyword in keywords :
if isinstance ( keyword , str ) and keyword . isdigit ( ) and len ( keyword ) == 8 :
vendor_cvr = keyword
break
# Get vendor info from database if CVR found
vendor_name = template_data . get ( ' issuer ' , ' Ukendt ' )
vendor_id = None
if vendor_cvr :
vendor = execute_query (
" SELECT id, name FROM vendors WHERE cvr_number = %s " ,
( vendor_cvr , ) ,
fetchone = True
)
if vendor :
vendor_id = vendor [ ' id ' ]
vendor_name = vendor [ ' name ' ]
invoice2data_templates . append ( {
' template_id ' : - 1 , # Negative ID to distinguish from DB templates
' template_name ' : f " Invoice2Data: { template_name } " ,
' template_type ' : ' invoice2data ' ,
' yaml_filename ' : template_name ,
' vendor_id ' : vendor_id ,
' vendor_name ' : vendor_name ,
' vendor_cvr ' : vendor_cvr ,
' default_product_category ' : template_data . get ( ' default_product_category ' , ' varesalg ' ) ,
' default_product_group_number ' : template_data . get ( ' default_product_group_number ' , 1 ) ,
' usage_count ' : 0 , # Could track this separately
' is_active ' : True ,
' detection_patterns ' : keywords ,
' field_mappings ' : template_data . get ( ' fields ' , { } ) ,
' created_at ' : None
} )
# Combine both types
all_templates = db_templates + invoice2data_templates
2025-12-08 09:15:52 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
return all_templates
2025-12-08 09:15:52 +01:00
except Exception as e :
logger . error ( f " ❌ Failed to list templates: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-08 23:46:18 +01:00
@router.get ( " /supplier-invoices/templates/ {template_id} " )
async def get_template ( template_id : int ) :
""" Hent et specifikt template med vendor info """
try :
query = """
SELECT t . * , v . name as vendor_name , v . cvr_number as vendor_cvr
FROM supplier_invoice_templates t
LEFT JOIN vendors v ON t . vendor_id = v . id
WHERE t . template_id = % s AND t . is_active = true
"""
template = execute_query ( query , ( template_id , ) , fetchone = True )
if not template :
raise HTTPException ( status_code = 404 , detail = " Template not found " )
return template
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to get template { template_id } : { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-08 09:15:52 +01:00
@router.post ( " /supplier-invoices/search-vendor " )
async def search_vendor_by_info ( request : Dict ) :
"""
Søg efter vendor baseret på navn , CVR , eller opret ny
Request body :
{
" vendor_name " : " DCS ApS " ,
" vendor_cvr " : " 12345678 " ,
" vendor_address " : " Vej 1, 2000 By " ,
" create_if_missing " : true
}
"""
try :
vendor_name = request . get ( ' vendor_name ' )
vendor_cvr = request . get ( ' vendor_cvr ' )
vendor_address = request . get ( ' vendor_address ' )
create_if_missing = request . get ( ' create_if_missing ' , False )
# Search by CVR first (most accurate)
if vendor_cvr :
vendor = execute_query (
" SELECT id, name, cvr_number FROM vendors WHERE cvr_number = %s " ,
( vendor_cvr , ) ,
fetchone = True
)
if vendor :
return {
" found " : True ,
" vendor_id " : vendor [ ' id ' ] ,
" vendor_name " : vendor [ ' name ' ] ,
" source " : " cvr_match "
}
# Search by name (fuzzy)
if vendor_name :
vendors = execute_query (
" SELECT id, name, cvr_number FROM vendors WHERE LOWER(name) LIKE LOWER( %s ) LIMIT 5 " ,
( f " % { vendor_name } % " , )
)
if vendors :
return {
" found " : True ,
" matches " : vendors ,
" source " : " name_search " ,
" message " : " Flere mulige matches - vælg en eller opret ny "
}
# Create new vendor if requested
if create_if_missing and vendor_name :
from app . core . config import settings
# Validate not creating vendor with own CVR
if vendor_cvr and settings . OWN_CVR in vendor_cvr :
raise HTTPException (
status_code = 400 ,
detail = f " Kan ikke oprette vendor med eget CVR ( { settings . OWN_CVR } ) "
)
new_vendor_id = execute_insert (
""" INSERT INTO vendors (name, cvr_number, address, created_at)
VALUES ( % s , % s , % s , CURRENT_TIMESTAMP ) """ ,
( vendor_name , vendor_cvr , vendor_address )
)
logger . info ( f " ✅ Created new vendor: { vendor_name } (ID: { new_vendor_id } ) " )
return {
" found " : False ,
" created " : True ,
" vendor_id " : new_vendor_id ,
" vendor_name " : vendor_name ,
" source " : " newly_created "
}
return {
" found " : False ,
" message " : " Ingen vendor fundet - angiv create_if_missing=true for at oprette "
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Vendor search failed: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.post ( " /supplier-invoices/ai/analyze " )
async def ai_analyze_invoice ( request : Dict ) :
""" Brug AI til at analysere faktura og foreslå template felter """
try :
pdf_text = request . get ( ' pdf_text ' , ' ' )
vendor_id = request . get ( ' vendor_id ' )
if not pdf_text :
raise HTTPException ( status_code = 400 , detail = " Ingen PDF tekst angivet " )
# Build enhanced PDF text with instruction
from app . core . config import settings
enhanced_text = f """ OPGAVE: Analyser denne danske faktura og udtræk information til template-generering.
RETURNER KUN VALID JSON - ingen forklaring , ingen markdown , kun ren JSON !
REQUIRED STRUKTUR ( alle felter skal med ) :
{ {
" invoice_number " : " 5082481 " ,
" invoice_date " : " 24/10-25 " ,
" total_amount " : " 1471.20 " ,
" cvr " : " 29522790 " ,
" detection_patterns " : [ " DCS ApS " , " WWW.DCS.DK " , " Høgemosevænget " ] ,
" lines_start " : " Nr.VarenrTekst " ,
" lines_end " : " Subtotal "
} }
FIND FØLGENDE :
1. invoice_number : Fakturanummer ( efter " Nummer " , " Faktura nr " , " Invoice " )
2. invoice_date : Dato ( format DD / MM - YY eller DD - MM - YYYY )
3. total_amount : Total beløb
- Søg efter " Total " , " I alt " , " Totalbeløb "
- Hvis beløbet er på næste linje , match sidste tal
- Format : [ \d . , ] + ( f . eks . 1.471 , 20 eller 1471.20 )
4. cvr : CVR nummer ( 8 cifre efter " CVR " , " Momsnr " , " DK " )
- IGNORER CVR { settings . OWN_CVR } - dette er KØBERS CVR , ikke leverandør !
- Find LEVERANDØRENS CVR ( normalt i toppen / header )
5. detection_patterns : 3 - 5 UNIKKE tekststrenge der identificerer leverandøren
- Leverandørens navn ( f . eks . " DCS ApS " , " ALSO A/S " )
- Website eller email ( f . eks . " WWW.DCS.DK " )
- Adresse element ( f . eks . " Høgemosevænget " , " Mårkærvej " )
- UNDGÅ generiske ord som " Faktura " , " Danmark " , " Side "
6. lines_start : Tekst LIGE FØR varelinjer ( f . eks . " Nr.VarenrTekst " , " Position Varenr " )
7. lines_end : Tekst EFTER varelinjer ( f . eks . " Subtotal " , " I alt " , " Side 1 af " )
VIGTIGT :
- detection_patterns SKAL være mindst 3 specifikke tekststrenge
- Vælg tekststrenge der er UNIKKE for denne leverandør
- CVR SKAL være leverandørens - IKKE { settings . OWN_CVR } ( det er køber )
- LAD VÆRE med at lave patterns eller line_item regex - kun udtræk rå data
PDF TEKST :
{ pdf_text [ : 2000 ] }
RETURNER KUN JSON - intet andet ! """
# Call Ollama
logger . info ( f " 🤖 Starter AI analyse af { len ( pdf_text ) } tegn PDF tekst " )
result = await ollama_service . extract_from_text ( enhanced_text )
if not result :
raise HTTPException ( status_code = 500 , detail = " AI kunne ikke analysere fakturaen " )
2025-12-07 03:29:54 +01:00
2025-12-08 09:15:52 +01:00
logger . info ( f " ✅ AI analyse gennemført: { result } " )
return result
2025-12-07 03:29:54 +01:00
except Exception as e :
2025-12-08 09:15:52 +01:00
logger . error ( f " ❌ AI analyse fejlede: { e } " )
raise HTTPException ( status_code = 500 , detail = f " AI analyse fejlede: { str ( e ) } " )
2025-12-07 03:29:54 +01:00
2025-12-08 09:15:52 +01:00
@router.post ( " /supplier-invoices/templates " )
async def create_template ( request : Dict ) :
"""
Opret ny template
Request body :
{
" vendor_id " : 1 ,
" template_name " : " Test Template " ,
" detection_patterns " : [ { " type " : " text " , " pattern " : " BMC Denmark " , " weight " : 0.5 } ] ,
" field_mappings " : { " invoice_number " : { " pattern " : r " Nummer \ s*( \ d+) " , " group " : 1 } }
}
"""
2025-12-07 03:29:54 +01:00
try :
2025-12-08 09:15:52 +01:00
import json
from app . core . config import settings
vendor_id = request . get ( ' vendor_id ' )
template_name = request . get ( ' template_name ' )
detection_patterns = request . get ( ' detection_patterns ' , [ ] )
field_mappings = request . get ( ' field_mappings ' , { } )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
default_product_category = request . get ( ' default_product_category ' , ' varesalg ' )
2025-12-08 09:15:52 +01:00
if not vendor_id or not template_name :
raise HTTPException ( status_code = 400 , detail = " vendor_id og template_name er påkrævet " )
# Validate that vendor CVR is not own company
vendor_cvr_mapping = field_mappings . get ( ' vendor_cvr ' , { } )
if vendor_cvr_mapping :
# Extract CVR value from pattern or value field
cvr_value = vendor_cvr_mapping . get ( ' value ' ) or vendor_cvr_mapping . get ( ' pattern ' , ' ' )
if settings . OWN_CVR in str ( cvr_value ) :
raise HTTPException (
status_code = 400 ,
detail = f " CVR { cvr_value } matcher egen virksomhed ( { settings . OWN_CVR } ). Brug leverandørens CVR, ikke købers! "
)
# Insert template and get template_id
2025-12-07 03:29:54 +01:00
query = """
2025-12-08 09:15:52 +01:00
INSERT INTO supplier_invoice_templates
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
( vendor_id , template_name , detection_patterns , field_mappings , default_product_category )
VALUES ( % s , % s , % s , % s , % s )
2025-12-08 09:15:52 +01:00
RETURNING template_id
2025-12-07 03:29:54 +01:00
"""
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
result = execute_query ( query , ( vendor_id , template_name , json . dumps ( detection_patterns ) , json . dumps ( field_mappings ) , default_product_category ) )
2025-12-08 09:15:52 +01:00
template_id = result [ 0 ] [ ' template_id ' ] if result else None
2025-12-07 03:29:54 +01:00
2025-12-08 09:15:52 +01:00
if not template_id :
raise HTTPException ( status_code = 500 , detail = " Kunne ikke oprette template " )
# Reload templates in cache
template_service . reload_templates ( )
logger . info ( f " ✅ Template created: { template_name } (ID: { template_id } ) for vendor { vendor_id } " )
return { " template_id " : template_id , " message " : " Template oprettet " }
except HTTPException :
raise
2025-12-07 03:29:54 +01:00
except Exception as e :
2025-12-08 09:15:52 +01:00
logger . error ( f " ❌ Failed to create template: { e } " , exc_info = True )
2025-12-07 03:29:54 +01:00
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /supplier-invoices/ {invoice_id} " )
async def get_supplier_invoice ( invoice_id : int ) :
""" Get single supplier invoice with lines """
try :
invoice = execute_query (
""" SELECT si.*, v.name as vendor_full_name, v.economic_supplier_number as vendor_economic_id
FROM supplier_invoices si
LEFT JOIN vendors v ON si . vendor_id = v . id
WHERE si . id = % s """ ,
( invoice_id , ) ,
fetchone = True
)
if not invoice :
raise HTTPException ( status_code = 404 , detail = f " Invoice { invoice_id } not found " )
# Get lines
lines = execute_query (
" SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number " ,
( invoice_id , )
)
invoice [ ' lines ' ] = lines
return invoice
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to get supplier invoice { invoice_id } : { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.post ( " /supplier-invoices " )
async def create_supplier_invoice ( data : Dict ) :
"""
Create new supplier invoice
Required fields :
- invoice_number : str
- vendor_id : int
- invoice_date : str ( YYYY - MM - DD )
- total_amount : float
Optional fields :
- due_date : str ( YYYY - MM - DD ) - defaults to invoice_date + 30 days
- vat_amount : float
- net_amount : float
- currency : str ( default ' DKK ' )
- description : str
- notes : str
- lines : List [ Dict ] with line items
"""
try :
# Validate required fields
required = [ ' invoice_number ' , ' vendor_id ' , ' invoice_date ' , ' total_amount ' ]
missing = [ f for f in required if f not in data ]
if missing :
raise HTTPException ( status_code = 400 , detail = f " Missing required fields: { ' , ' . join ( missing ) } " )
# Calculate due_date if not provided (30 days default)
invoice_date = datetime . fromisoformat ( data [ ' invoice_date ' ] )
due_date = data . get ( ' due_date ' )
if not due_date :
due_date = ( invoice_date + timedelta ( days = 30 ) ) . strftime ( ' % Y- % m- %d ' )
2025-12-08 09:15:52 +01:00
# Determine invoice type (default to invoice)
invoice_type = data . get ( ' invoice_type ' , ' invoice ' )
if invoice_type not in [ ' invoice ' , ' credit_note ' ] :
invoice_type = ' invoice '
2025-12-07 03:29:54 +01:00
# Insert supplier invoice
invoice_id = execute_insert (
""" INSERT INTO supplier_invoices
( invoice_number , vendor_id , vendor_name , invoice_date , due_date ,
total_amount , vat_amount , net_amount , currency , description , notes ,
2025-12-08 09:15:52 +01:00
status , created_by , invoice_type )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s ) """ ,
2025-12-07 03:29:54 +01:00
(
data [ ' invoice_number ' ] ,
data [ ' vendor_id ' ] ,
data . get ( ' vendor_name ' ) ,
data [ ' invoice_date ' ] ,
due_date ,
data [ ' total_amount ' ] ,
data . get ( ' vat_amount ' , 0 ) ,
data . get ( ' net_amount ' , data [ ' total_amount ' ] ) ,
data . get ( ' currency ' , ' DKK ' ) ,
data . get ( ' description ' ) ,
data . get ( ' notes ' ) ,
2025-12-08 09:15:52 +01:00
' credited ' if invoice_type == ' credit_note ' else ' pending ' ,
data . get ( ' created_by ' ) ,
invoice_type
2025-12-07 03:29:54 +01:00
)
)
# Insert lines if provided
if data . get ( ' lines ' ) :
for idx , line in enumerate ( data [ ' lines ' ] , start = 1 ) :
2025-12-08 23:46:18 +01:00
# Map vat_code: I52 for reverse charge, I25 for standard
vat_code = line . get ( ' vat_code ' , ' I25 ' )
2025-12-07 03:29:54 +01:00
execute_insert (
""" INSERT INTO supplier_invoice_lines
( supplier_invoice_id , line_number , description , quantity , unit_price ,
line_total , vat_code , vat_rate , vat_amount , contra_account , sku )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s ) """ ,
(
invoice_id ,
line . get ( ' line_number ' , idx ) ,
line . get ( ' description ' ) ,
line . get ( ' quantity ' , 1 ) ,
line . get ( ' unit_price ' , 0 ) ,
line . get ( ' line_total ' , 0 ) ,
2025-12-08 23:46:18 +01:00
vat_code ,
2025-12-07 03:29:54 +01:00
line . get ( ' vat_rate ' , 25.00 ) ,
line . get ( ' vat_amount ' , 0 ) ,
line . get ( ' contra_account ' , ' 5810 ' ) ,
line . get ( ' sku ' )
)
)
logger . info ( f " ✅ Created supplier invoice: { data [ ' invoice_number ' ] } (ID: { invoice_id } ) " )
return {
" success " : True ,
" invoice_id " : invoice_id ,
" invoice_number " : data [ ' invoice_number ' ] ,
" due_date " : due_date
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to create supplier invoice: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.put ( " /supplier-invoices/ {invoice_id} " )
async def update_supplier_invoice ( invoice_id : int , data : Dict ) :
""" Update supplier invoice details """
try :
# Check if invoice exists
existing = execute_query (
" SELECT id, status FROM supplier_invoices WHERE id = %s " ,
( invoice_id , ) ,
fetchone = True
)
if not existing :
raise HTTPException ( status_code = 404 , detail = f " Invoice { invoice_id } not found " )
# Don't allow editing if already sent to e-conomic
if existing [ ' status ' ] == ' sent_to_economic ' :
raise HTTPException (
status_code = 400 ,
detail = " Cannot edit invoice that has been sent to e-conomic "
)
# Build update query dynamically based on provided fields
update_fields = [ ]
params = [ ]
allowed_fields = [ ' invoice_number ' , ' vendor_id ' , ' vendor_name ' , ' invoice_date ' ,
' due_date ' , ' total_amount ' , ' vat_amount ' , ' net_amount ' ,
' currency ' , ' description ' , ' notes ' , ' status ' ]
for field in allowed_fields :
if field in data :
update_fields . append ( f " { field } = %s " )
params . append ( data [ field ] )
if not update_fields :
raise HTTPException ( status_code = 400 , detail = " No fields to update " )
params . append ( invoice_id )
query = f """
UPDATE supplier_invoices
SET { ' , ' . join ( update_fields ) } , updated_at = CURRENT_TIMESTAMP
WHERE id = % s
"""
execute_update ( query , tuple ( params ) )
logger . info ( f " ✅ Updated supplier invoice { invoice_id } " )
return { " success " : True , " invoice_id " : invoice_id }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to update supplier invoice { invoice_id } : { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.delete ( " /supplier-invoices/ {invoice_id} " )
async def delete_supplier_invoice ( invoice_id : int ) :
""" Delete supplier invoice (soft delete if integrated with e-conomic) """
try :
invoice = execute_query (
" SELECT id, invoice_number, economic_voucher_number FROM supplier_invoices WHERE id = %s " ,
( invoice_id , ) ,
fetchone = True
)
if not invoice :
raise HTTPException ( status_code = 404 , detail = f " Invoice { invoice_id } not found " )
# If sent to e-conomic, only mark as cancelled (don't delete)
if invoice . get ( ' economic_voucher_number ' ) :
execute_update (
" UPDATE supplier_invoices SET status = ' cancelled ' , updated_at = CURRENT_TIMESTAMP WHERE id = %s " ,
( invoice_id , )
)
logger . info ( f " ⚠️ Marked supplier invoice { invoice [ ' invoice_number ' ] } as cancelled (sent to e-conomic) " )
return { " success " : True , " message " : " Invoice marked as cancelled " , " invoice_id " : invoice_id }
# Otherwise, delete invoice and lines
execute_update ( " DELETE FROM supplier_invoice_lines WHERE supplier_invoice_id = %s " , ( invoice_id , ) )
execute_update ( " DELETE FROM supplier_invoices WHERE id = %s " , ( invoice_id , ) )
logger . info ( f " 🗑️ Deleted supplier invoice { invoice [ ' invoice_number ' ] } (ID: { invoice_id } ) " )
return { " success " : True , " message " : " Invoice deleted " , " invoice_id " : invoice_id }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to delete supplier invoice { invoice_id } : { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
# ========== E-CONOMIC INTEGRATION ==========
@router.post ( " /supplier-invoices/ {invoice_id} /approve " )
async def approve_supplier_invoice ( invoice_id : int , approved_by : str ) :
""" Approve supplier invoice for payment """
try :
invoice = execute_query (
" SELECT id, invoice_number, status FROM supplier_invoices WHERE id = %s " ,
( invoice_id , ) ,
fetchone = True
)
if not invoice :
raise HTTPException ( status_code = 404 , detail = f " Invoice { invoice_id } not found " )
if invoice [ ' status ' ] != ' pending ' :
raise HTTPException ( status_code = 400 , detail = f " Invoice is already { invoice [ ' status ' ] } " )
execute_update (
""" UPDATE supplier_invoices
SET status = ' approved ' , approved_by = % s , approved_at = CURRENT_TIMESTAMP
WHERE id = % s """ ,
( approved_by , invoice_id )
)
logger . info ( f " ✅ Approved supplier invoice { invoice [ ' invoice_number ' ] } by { approved_by } " )
return { " success " : True , " invoice_id " : invoice_id , " approved_by " : approved_by }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to approve invoice { invoice_id } : { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.post ( " /supplier-invoices/ {invoice_id} /send-to-economic " )
async def send_to_economic ( invoice_id : int ) :
"""
Send approved supplier invoice to e - conomic kassekladde
Creates voucher entry in e - conomic journals
"""
try :
# Get invoice with lines
invoice = execute_query (
""" SELECT si.*, v.economic_supplier_number as vendor_economic_id, v.name as vendor_full_name
FROM supplier_invoices si
LEFT JOIN vendors v ON si . vendor_id = v . id
WHERE si . id = % s """ ,
( invoice_id , ) ,
fetchone = True
)
if not invoice :
raise HTTPException ( status_code = 404 , detail = f " Invoice { invoice_id } not found " )
if invoice [ ' status ' ] != ' approved ' :
raise HTTPException ( status_code = 400 , detail = " Invoice must be approved before sending to e-conomic " )
if invoice . get ( ' economic_voucher_number ' ) :
raise HTTPException ( status_code = 400 , detail = " Invoice already sent to e-conomic " )
# Get lines
lines = execute_query (
" SELECT * FROM supplier_invoice_lines WHERE supplier_invoice_id = %s ORDER BY line_number " ,
( invoice_id , )
)
if not lines :
raise HTTPException ( status_code = 400 , detail = " Invoice must have at least one line item " )
# Check if vendor exists in e-conomic
economic = get_economic_service ( )
vendor_economic_id = invoice . get ( ' vendor_economic_id ' )
# If vendor not in e-conomic, create it
if not vendor_economic_id :
vendor_result = await economic . search_supplier_by_name ( invoice . get ( ' vendor_full_name ' ) or invoice . get ( ' vendor_name ' ) )
if vendor_result :
vendor_economic_id = vendor_result [ ' supplierNumber ' ]
# Update local vendor record
execute_update (
" UPDATE vendors SET economic_supplier_number = %s WHERE id = %s " ,
( vendor_economic_id , invoice [ ' vendor_id ' ] )
)
else :
# Create new supplier in e-conomic
new_supplier = await economic . create_supplier ( {
' name ' : invoice . get ( ' vendor_full_name ' ) or invoice . get ( ' vendor_name ' ) ,
' currency ' : invoice . get ( ' currency ' , ' DKK ' )
} )
if new_supplier and new_supplier . get ( ' supplierNumber ' ) :
vendor_economic_id = new_supplier [ ' supplierNumber ' ]
else :
raise HTTPException ( status_code = 500 , detail = " Failed to create supplier in e-conomic " )
# Get default journal number from settings
journal_setting = execute_query (
" SELECT setting_value FROM supplier_invoice_settings WHERE setting_key = ' economic_default_journal ' " ,
fetchone = True
)
journal_number = int ( journal_setting [ ' setting_value ' ] ) if journal_setting else 1
# Build VAT breakdown from lines
vat_breakdown = { }
line_items = [ ]
for line in lines :
vat_code = line . get ( ' vat_code ' , ' I25 ' )
if vat_code not in vat_breakdown :
vat_breakdown [ vat_code ] = {
' net ' : 0 ,
' vat ' : 0 ,
' gross ' : 0 ,
' rate ' : line . get ( ' vat_rate ' , 25.00 )
}
line_total = float ( line . get ( ' line_total ' , 0 ) )
vat_amount = float ( line . get ( ' vat_amount ' , 0 ) )
net_amount = line_total - vat_amount
vat_breakdown [ vat_code ] [ ' net ' ] + = net_amount
vat_breakdown [ vat_code ] [ ' vat ' ] + = vat_amount
vat_breakdown [ vat_code ] [ ' gross ' ] + = line_total
line_items . append ( {
' description ' : line . get ( ' description ' ) ,
' quantity ' : float ( line . get ( ' quantity ' , 1 ) ) ,
' unit_price ' : float ( line . get ( ' unit_price ' , 0 ) ) ,
' line_total ' : line_total ,
' vat_code ' : vat_code ,
' vat_amount ' : vat_amount ,
' contra_account ' : line . get ( ' contra_account ' , ' 5810 ' ) ,
' sku ' : line . get ( ' sku ' )
} )
# Send to e-conomic
result = await economic . create_journal_supplier_invoice (
journal_number = journal_number ,
supplier_number = vendor_economic_id ,
invoice_number = invoice [ ' invoice_number ' ] ,
invoice_date = invoice [ ' invoice_date ' ] . isoformat ( ) if isinstance ( invoice [ ' invoice_date ' ] , date ) else invoice [ ' invoice_date ' ] ,
total_amount = float ( invoice [ ' total_amount ' ] ) ,
vat_breakdown = vat_breakdown ,
line_items = line_items ,
due_date = invoice [ ' due_date ' ] . isoformat ( ) if invoice . get ( ' due_date ' ) and isinstance ( invoice [ ' due_date ' ] , date ) else invoice . get ( ' due_date ' ) ,
text = invoice . get ( ' description ' ) or f " Supplier invoice { invoice [ ' invoice_number ' ] } "
)
if result . get ( ' error ' ) :
raise HTTPException ( status_code = 500 , detail = result . get ( ' message ' , ' Failed to create voucher in e-conomic ' ) )
# Update invoice with e-conomic details
execute_update (
""" UPDATE supplier_invoices
SET status = ' sent_to_economic ' ,
economic_supplier_number = % s ,
economic_journal_number = % s ,
economic_voucher_number = % s ,
economic_accounting_year = % s ,
sent_to_economic_at = CURRENT_TIMESTAMP
WHERE id = % s """ ,
(
vendor_economic_id ,
result [ ' journal_number ' ] ,
result [ ' voucher_number ' ] ,
result [ ' accounting_year ' ] ,
invoice_id
)
)
# Upload attachment if file_path exists
if invoice . get ( ' file_path ' ) and os . path . exists ( invoice [ ' file_path ' ] ) :
attachment_result = await economic . upload_voucher_attachment (
journal_number = result [ ' journal_number ' ] ,
accounting_year = result [ ' accounting_year ' ] ,
voucher_number = result [ ' voucher_number ' ] ,
pdf_path = invoice [ ' file_path ' ] ,
filename = f " { invoice [ ' invoice_number ' ] } .pdf "
)
if attachment_result . get ( ' success ' ) :
logger . info ( f " 📎 Uploaded attachment for voucher { result [ ' voucher_number ' ] } " )
logger . info ( f " ✅ Sent supplier invoice { invoice [ ' invoice_number ' ] } to e-conomic (voucher # { result [ ' voucher_number ' ] } ) " )
return {
" success " : True ,
" invoice_id " : invoice_id ,
" voucher_number " : result [ ' voucher_number ' ] ,
" journal_number " : result [ ' journal_number ' ] ,
" accounting_year " : result [ ' accounting_year ' ]
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to send invoice { invoice_id } to e-conomic: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /supplier-invoices/economic/journals " )
async def get_economic_journals ( ) :
""" Get available e-conomic journals (kassekladder) """
try :
economic = get_economic_service ( )
journals = await economic . get_supplier_invoice_journals ( )
return { " journals " : journals }
except Exception as e :
logger . error ( f " ❌ Failed to get e-conomic journals: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
# ========== STATISTICS & REPORTS ==========
@router.get ( " /supplier-invoices/stats/overview " )
async def get_payment_overview ( ) :
"""
Get overview of supplier invoices payment status
Returns stats for total , paid , overdue , due soon , and pending invoices
"""
try :
today = date . today ( ) . isoformat ( )
stats = execute_query ( """
SELECT
COUNT ( * ) as total_count ,
SUM ( CASE WHEN paid_date IS NOT NULL THEN 1 ELSE 0 END ) as paid_count ,
SUM ( CASE WHEN paid_date IS NULL AND due_date < % s THEN 1 ELSE 0 END ) as overdue_count ,
SUM ( CASE WHEN paid_date IS NULL AND due_date > = % s AND due_date < = ( % s : : date + INTERVAL ' 7 days ' ) THEN 1 ELSE 0 END ) as due_soon_count ,
SUM ( CASE WHEN paid_date IS NULL AND ( due_date IS NULL OR due_date > ( % s : : date + INTERVAL ' 7 days ' ) ) THEN 1 ELSE 0 END ) as pending_count ,
SUM ( total_amount ) as total_amount ,
SUM ( CASE WHEN paid_date IS NOT NULL THEN total_amount ELSE 0 END ) as paid_amount ,
SUM ( CASE WHEN paid_date IS NULL THEN total_amount ELSE 0 END ) as unpaid_amount ,
SUM ( CASE WHEN paid_date IS NULL AND due_date < % s THEN total_amount ELSE 0 END ) as overdue_amount
FROM supplier_invoices
WHERE status != ' cancelled '
""" , (today, today, today, today, today), fetchone=True)
return {
" total_invoices " : stats . get ( ' total_count ' , 0 ) if stats else 0 ,
" paid_count " : stats . get ( ' paid_count ' , 0 ) if stats else 0 ,
" overdue_count " : stats . get ( ' overdue_count ' , 0 ) if stats else 0 ,
" due_soon_count " : stats . get ( ' due_soon_count ' , 0 ) if stats else 0 ,
" pending_count " : stats . get ( ' pending_count ' , 0 ) if stats else 0 ,
" total_amount " : float ( stats . get ( ' total_amount ' , 0 ) or 0 ) if stats else 0 ,
" paid_amount " : float ( stats . get ( ' paid_amount ' , 0 ) or 0 ) if stats else 0 ,
" unpaid_amount " : float ( stats . get ( ' unpaid_amount ' , 0 ) or 0 ) if stats else 0 ,
" overdue_amount " : float ( stats . get ( ' overdue_amount ' , 0 ) or 0 ) if stats else 0
}
except Exception as e :
logger . error ( f " ❌ Failed to get payment overview: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /supplier-invoices/stats/by-vendor " )
async def get_stats_by_vendor ( ) :
""" Get supplier invoice statistics grouped by vendor """
try :
stats = execute_query ( """
SELECT
v . id as vendor_id ,
v . name as vendor_name ,
COUNT ( si . id ) as invoice_count ,
SUM ( si . total_amount ) as total_amount ,
SUM ( CASE WHEN si . paid_date IS NULL THEN si . total_amount ELSE 0 END ) as unpaid_amount ,
MAX ( si . due_date ) as latest_due_date
FROM vendors v
LEFT JOIN supplier_invoices si ON v . id = si . vendor_id
WHERE si . status != ' cancelled ' OR si . status IS NULL
GROUP BY v . id , v . name
HAVING COUNT ( si . id ) > 0
ORDER BY unpaid_amount DESC
""" )
return { " vendor_stats " : stats }
except Exception as e :
logger . error ( f " ❌ Failed to get vendor stats: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
# ========== UPLOAD & AI EXTRACTION ==========
@router.post ( " /supplier-invoices/upload " )
async def upload_supplier_invoice ( file : UploadFile = File ( . . . ) ) :
"""
Upload supplier invoice ( PDF / image ) and extract data using templates
Process :
1. Validate file type and size
2. Calculate SHA256 checksum for duplicate detection
3. Save file to uploads directory
4. Extract text ( PDF / OCR )
5. Match template based on PDF content
6. Extract fields using template regex patterns
7. Show form with pre - filled data for user review
Returns :
{
" status " : " success|duplicate|needs_review " ,
" file_id " : int ,
" template_matched " : bool ,
" template_id " : int ,
" extracted_fields " : dict ,
" confidence " : float ,
" pdf_text " : str # For manual review
}
"""
2025-12-08 09:15:52 +01:00
from app . core . config import settings
2025-12-07 03:29:54 +01:00
try :
# Validate file extension
suffix = Path ( file . filename ) . suffix . lower ( )
if suffix not in settings . ALLOWED_EXTENSIONS :
raise HTTPException (
status_code = 400 ,
detail = f " Filtype { suffix } ikke tilladt. Tilladte: { ' , ' . join ( settings . ALLOWED_EXTENSIONS ) } "
)
# Create upload directory
upload_dir = Path ( settings . UPLOAD_DIR )
upload_dir . mkdir ( parents = True , exist_ok = True )
# Save file temporarily to calculate checksum
temp_path = upload_dir / f " temp_ { datetime . now ( ) . timestamp ( ) } _ { file . filename } "
try :
# Validate file size while saving
max_size = settings . MAX_FILE_SIZE_MB * 1024 * 1024
total_size = 0
with open ( temp_path , " wb " ) as buffer :
while chunk := await file . read ( 8192 ) :
total_size + = len ( chunk )
if total_size > max_size :
temp_path . unlink ( missing_ok = True )
raise HTTPException (
status_code = 413 ,
detail = f " Fil for stor (max { settings . MAX_FILE_SIZE_MB } MB) "
)
buffer . write ( chunk )
logger . info ( f " 📥 Uploaded file: { file . filename } ( { total_size } bytes) " )
# Calculate SHA256 checksum
checksum = ollama_service . calculate_file_checksum ( temp_path )
# Check for duplicate file
existing_file = execute_query (
" SELECT file_id, status FROM incoming_files WHERE checksum = %s " ,
( checksum , ) ,
fetchone = True
)
if existing_file :
temp_path . unlink ( missing_ok = True )
logger . warning ( f " ⚠️ Duplicate file detected: { checksum [ : 16 ] } ... " )
# Get existing invoice if linked
existing_invoice = execute_query (
""" SELECT si.* FROM supplier_invoices si
JOIN extractions e ON si . extraction_id = e . extraction_id
WHERE e . file_id = % s """ ,
( existing_file [ ' file_id ' ] , ) ,
fetchone = True
)
return {
" status " : " duplicate " ,
" message " : " Denne fil er allerede uploadet " ,
" file_id " : existing_file [ ' file_id ' ] ,
" invoice_id " : existing_invoice [ ' id ' ] if existing_invoice else None
}
# Rename to permanent name
final_path = upload_dir / file . filename
counter = 1
while final_path . exists ( ) :
final_path = upload_dir / f " { final_path . stem } _ { counter } { final_path . suffix } "
counter + = 1
temp_path . rename ( final_path )
logger . info ( f " 💾 Saved file as: { final_path . name } " )
# Insert file record
file_record = execute_query (
""" INSERT INTO incoming_files
( filename , original_filename , file_path , file_size , mime_type , checksum , status )
VALUES ( % s , % s , % s , % s , % s , % s , ' processing ' ) RETURNING file_id """ ,
( final_path . name , file . filename , str ( final_path ) , total_size ,
ollama_service . _get_mime_type ( final_path ) , checksum ) ,
fetchone = True
)
file_id = file_record [ ' file_id ' ]
# Extract text from file
logger . info ( f " 📄 Extracting text from { final_path . suffix } ... " )
text = await ollama_service . _extract_text_from_file ( final_path )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# QUICK ANALYSIS: Extract CVR, document type, invoice number IMMEDIATELY
logger . info ( f " ⚡ Running quick analysis... " )
quick_result = await ollama_service . quick_analysis_on_upload ( text )
# Update file record with quick analysis results
execute_update (
""" UPDATE incoming_files
SET detected_cvr = % s ,
detected_vendor_id = % s ,
detected_document_type = % s ,
detected_document_number = % s ,
is_own_invoice = % s
WHERE file_id = % s """ ,
( quick_result . get ( ' cvr ' ) ,
quick_result . get ( ' vendor_id ' ) ,
quick_result . get ( ' document_type ' ) ,
quick_result . get ( ' document_number ' ) ,
quick_result . get ( ' is_own_invoice ' , False ) ,
file_id )
)
logger . info ( f " 📋 Quick analysis saved: CVR= { quick_result . get ( ' cvr ' ) } , "
f " Vendor= { quick_result . get ( ' vendor_name ' ) } , "
f " Type= { quick_result . get ( ' document_type ' ) } , "
f " Number= { quick_result . get ( ' document_number ' ) } " )
# DUPLICATE CHECK: Check if invoice number already exists
document_number = quick_result . get ( ' document_number ' )
if document_number :
logger . info ( f " 🔍 Checking for duplicate invoice number: { document_number } " )
# Check 1: Search in local database (supplier_invoices table)
existing_invoice = execute_query (
""" SELECT si.id, si.invoice_number, si.created_at, v.name as vendor_name
FROM supplier_invoices si
LEFT JOIN vendors v ON v . id = si . vendor_id
WHERE si . invoice_number = % s
ORDER BY si . created_at DESC
LIMIT 1 """ ,
( document_number , ) ,
fetchone = True
)
if existing_invoice :
# DUPLICATE FOUND IN DATABASE
logger . error ( f " 🚫 DUPLICATE: Invoice { document_number } already exists in database (ID: { existing_invoice [ ' id ' ] } ) " )
# Mark file as duplicate
execute_update (
""" UPDATE incoming_files
SET status = ' duplicate ' ,
error_message = % s ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( f " DUBLET: Fakturanummer { document_number } findes allerede i systemet (Faktura # { existing_invoice [ ' id ' ] } , { existing_invoice [ ' vendor_name ' ] or ' Ukendt leverandør ' } ) " ,
file_id )
)
raise HTTPException (
status_code = 409 , # 409 Conflict
detail = f " 🚫 DUBLET: Fakturanummer { document_number } findes allerede i systemet (Faktura # { existing_invoice [ ' id ' ] } , oprettet { existing_invoice [ ' created_at ' ] . strftime ( ' %d - % m- % Y ' ) } ) "
)
# Check 2: Search in e-conomic (if configured)
from app . services . economic_service import economic_service
if hasattr ( economic_service , ' app_secret_token ' ) and economic_service . app_secret_token :
logger . info ( f " 🔍 Checking e-conomic for invoice number: { document_number } " )
economic_duplicate = await economic_service . check_invoice_number_exists ( document_number )
if economic_duplicate :
# DUPLICATE FOUND IN E-CONOMIC
logger . error ( f " 🚫 DUPLICATE: Invoice { document_number } found in e-conomic (Voucher # { economic_duplicate . get ( ' voucher_number ' ) } ) " )
# Mark file as duplicate
execute_update (
""" UPDATE incoming_files
SET status = ' duplicate ' ,
error_message = % s ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( f " DUBLET: Fakturanummer { document_number } findes i e-conomic (Bilag # { economic_duplicate . get ( ' voucher_number ' ) } ) " ,
file_id )
)
raise HTTPException (
status_code = 409 , # 409 Conflict
detail = f " 🚫 DUBLET: Fakturanummer { document_number } findes i e-conomic (Bilag # { economic_duplicate . get ( ' voucher_number ' ) } , { economic_duplicate . get ( ' date ' ) } ) "
)
logger . info ( f " ✅ No duplicate found for invoice { document_number } " )
2025-12-07 03:29:54 +01:00
# Try template matching
logger . info ( f " 📋 Matching template... " )
template_id , confidence = template_service . match_template ( text )
extracted_fields = { }
vendor_id = None
if template_id and confidence > = 0.5 :
# Extract fields using template
logger . info ( f " ✅ Using template { template_id } ( { confidence : .0% } confidence) " )
extracted_fields = template_service . extract_fields ( text , template_id )
# Get vendor from template
template = template_service . templates_cache . get ( template_id )
if template :
vendor_id = template . get ( ' vendor_id ' )
2025-12-08 09:15:52 +01:00
# Save extraction to database
import json
extraction_id = execute_insert (
""" INSERT INTO extractions
2025-12-08 23:46:18 +01:00
( file_id , vendor_matched_id , document_id , document_date , due_date ,
total_amount , currency , document_type , confidence , llm_response_json , status )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , ' extracted ' ) """ ,
( file_id , vendor_id ,
extracted_fields . get ( ' invoice_number ' ) ,
extracted_fields . get ( ' invoice_date ' ) ,
extracted_fields . get ( ' due_date ' ) ,
extracted_fields . get ( ' total_amount ' ) ,
extracted_fields . get ( ' currency ' , ' DKK ' ) ,
extracted_fields . get ( ' document_type ' ) ,
confidence ,
json . dumps ( extracted_fields ) )
2025-12-08 09:15:52 +01:00
)
2025-12-08 23:46:18 +01:00
# Insert line items if extracted
if extracted_fields . get ( ' lines ' ) :
for idx , line in enumerate ( extracted_fields [ ' lines ' ] , start = 1 ) :
execute_insert (
""" INSERT INTO extraction_lines
( extraction_id , line_number , description , quantity , unit_price ,
line_total , vat_rate , vat_note , confidence )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s )
RETURNING line_id """ ,
2025-12-08 23:46:18 +01:00
( extraction_id , idx , line . get ( ' description ' ) ,
line . get ( ' quantity ' ) , line . get ( ' unit_price ' ) ,
line . get ( ' line_total ' ) , line . get ( ' vat_rate ' ) ,
line . get ( ' vat_note ' ) , confidence )
)
2025-12-07 03:29:54 +01:00
# Log usage
template_service . log_usage ( template_id , file_id , True , confidence , extracted_fields )
# Update file record
execute_update (
""" UPDATE incoming_files
SET status = ' processed ' , template_id = % s , processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( template_id , file_id )
)
else :
2025-12-08 23:46:18 +01:00
# NO AI FALLBACK - Require template
logger . warning ( f " ⚠️ No template matched (confidence: { confidence : .0% } ) - rejecting file " )
2025-12-08 09:15:52 +01:00
2025-12-08 23:46:18 +01:00
execute_update (
""" UPDATE incoming_files
SET status = ' failed ' ,
error_message = ' Ingen template match - opret template for denne leverandør ' ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( file_id , )
)
raise HTTPException (
status_code = 400 ,
detail = f " Ingen template match ( { confidence : .0% } confidence) - opret template for denne leverandør "
)
2025-12-07 03:29:54 +01:00
# Return data for user to review and confirm
return {
" status " : " needs_review " ,
" file_id " : file_id ,
" template_matched " : template_id is not None ,
" template_id " : template_id ,
" vendor_id " : vendor_id ,
" confidence " : confidence ,
" extracted_fields " : extracted_fields ,
" pdf_text " : text [ : 500 ] , # First 500 chars for reference
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Quick analysis results (available IMMEDIATELY on upload)
" quick_analysis " : {
" cvr " : quick_result . get ( ' cvr ' ) ,
" vendor_id " : quick_result . get ( ' vendor_id ' ) ,
" vendor_name " : quick_result . get ( ' vendor_name ' ) ,
" document_type " : quick_result . get ( ' document_type ' ) ,
" document_number " : quick_result . get ( ' document_number ' )
} ,
2025-12-07 03:29:54 +01:00
" message " : " Upload gennemført - gennemgå og bekræft data "
}
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
except HTTPException as he :
# Mark file as failed if we have file_id
if ' file_id ' in locals ( ) :
execute_update (
""" UPDATE incoming_files
SET status = ' failed ' ,
error_message = % s ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( str ( he . detail ) , file_id )
)
2025-12-07 03:29:54 +01:00
raise
except Exception as e :
logger . error ( f " ❌ Upload failed (inner): { e } " , exc_info = True )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Mark file as failed if we have file_id
if ' file_id ' in locals ( ) :
execute_update (
""" UPDATE incoming_files
SET status = ' failed ' ,
error_message = % s ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( str ( e ) , file_id )
)
2025-12-07 03:29:54 +01:00
raise HTTPException ( status_code = 500 , detail = f " Upload fejlede: { str ( e ) } " )
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Upload failed (outer): { e } " , exc_info = True )
raise HTTPException ( status_code = 500 , detail = f " Upload fejlede: { str ( e ) } " )
# ========== ECONOMIC SYNC ==========
@router.post ( " /supplier-invoices/ {invoice_id} /send-to-economic " )
async def send_invoice_to_economic ( invoice_id : int ) :
""" Send supplier invoice to e-conomic - requires separate implementation """
raise HTTPException ( status_code = 501 , detail = " e-conomic integration kommer senere " )
@router.post ( " /supplier-invoices/reprocess/ {file_id} " )
async def reprocess_uploaded_file ( file_id : int ) :
"""
Genbehandl en uploadet fil med template matching
Bruges til at behandle filer der fejlede eller ikke blev færdigbehandlet
"""
2025-12-08 09:15:52 +01:00
import json
from datetime import datetime , timedelta
2025-12-07 03:29:54 +01:00
try :
# Get file record
file_record = execute_query (
" SELECT * FROM incoming_files WHERE file_id = %s " ,
( file_id , ) ,
fetchone = True
)
if not file_record :
raise HTTPException ( status_code = 404 , detail = f " Fil { file_id } ikke fundet " )
file_path = Path ( file_record [ ' file_path ' ] )
if not file_path . exists ( ) :
raise HTTPException ( status_code = 404 , detail = f " Fil ikke fundet på disk: { file_path } " )
logger . info ( f " <EFBFBD> <EFBFBD> Genbehandler fil { file_id } : { file_record [ ' filename ' ] } " )
# Extract text from file
text = await ollama_service . _extract_text_from_file ( file_path )
# Try template matching
template_id , confidence = template_service . match_template ( text )
extracted_fields = { }
vendor_id = None
if template_id and confidence > = 0.5 :
logger . info ( f " ✅ Matched template { template_id } ( { confidence : .0% } ) " )
extracted_fields = template_service . extract_fields ( text , template_id )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Check if this is an invoice2data template (ID -1)
is_invoice2data = ( template_id == - 1 )
2025-12-07 03:29:54 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
if is_invoice2data :
# Invoice2data doesn't have vendor in cache
logger . info ( f " 📋 Using invoice2data template " )
# Try to find vendor from extracted CVR
if extracted_fields . get ( ' vendor_vat ' ) :
vendor = execute_query (
" SELECT id FROM vendors WHERE cvr_number = %s " ,
( extracted_fields [ ' vendor_vat ' ] , ) ,
fetchone = True
)
if vendor :
vendor_id = vendor [ ' id ' ]
# Store invoice2data extraction in database
extraction_id = execute_insert (
""" INSERT INTO extractions
( file_id , vendor_matched_id , vendor_name , vendor_cvr ,
document_id , document_date , due_date , document_type , document_type_detected ,
total_amount , currency , confidence , llm_response_json , status )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s )
RETURNING extraction_id """ ,
( file_id , vendor_id ,
extracted_fields . get ( ' issuer ' ) , # vendor_name
extracted_fields . get ( ' vendor_vat ' ) , # vendor_cvr
str ( extracted_fields . get ( ' invoice_number ' ) ) , # document_id
extracted_fields . get ( ' invoice_date ' ) , # document_date
extracted_fields . get ( ' due_date ' ) ,
' invoice ' , # document_type
' invoice ' , # document_type_detected
extracted_fields . get ( ' amount_total ' ) ,
extracted_fields . get ( ' currency ' , ' DKK ' ) ,
1.0 , # invoice2data always 100% confidence
json . dumps ( extracted_fields ) , # llm_response_json
' extracted ' ) # status
)
# Insert line items if extracted
if extracted_fields . get ( ' lines ' ) :
for idx , line in enumerate ( extracted_fields [ ' lines ' ] , start = 1 ) :
execute_insert (
""" INSERT INTO extraction_lines
( extraction_id , line_number , description , quantity , unit_price ,
line_total , vat_rate , vat_note , confidence ,
ip_address , contract_number , location_street , location_zip , location_city )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s , % s )
RETURNING line_id """ ,
( extraction_id , idx , line . get ( ' description ' ) ,
line . get ( ' quantity ' ) , line . get ( ' unit_price ' ) ,
line . get ( ' line_total ' ) , None , None , 1.0 ,
line . get ( ' ip_address ' ) , line . get ( ' contract_number ' ) ,
line . get ( ' location_street ' ) , line . get ( ' location_zip ' ) , line . get ( ' location_city ' ) )
)
logger . info ( f " ✅ Saved { len ( extracted_fields [ ' lines ' ] ) } line items " )
else :
# Custom template from database
template = template_service . templates_cache . get ( template_id )
if template :
vendor_id = template . get ( ' vendor_id ' )
template_service . log_usage ( template_id , file_id , True , confidence , extracted_fields )
2025-12-07 03:29:54 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Update file - use NULL for invoice2data templates to avoid FK constraint
db_template_id = None if is_invoice2data else template_id
2025-12-07 03:29:54 +01:00
execute_update (
""" UPDATE incoming_files
SET status = ' processed ' , template_id = % s , processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
( db_template_id , file_id )
2025-12-07 03:29:54 +01:00
)
else :
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# FALLBACK TO AI EXTRACTION
logger . info ( f " ⚠️ Ingen template match (confidence: { confidence : .0% } ) - bruger AI extraction " )
# Use detected vendor from quick analysis if available
vendor_id = file_record . get ( ' detected_vendor_id ' )
2025-12-08 09:15:52 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Call Ollama for full extraction
logger . info ( f " 🤖 Calling Ollama for AI extraction... " )
llm_result = await ollama_service . extract_from_text ( text )
if not llm_result or ' error ' in llm_result :
error_msg = llm_result . get ( ' error ' ) if llm_result else ' AI extraction fejlede '
logger . error ( f " ❌ AI extraction failed: { error_msg } " )
execute_update (
""" UPDATE incoming_files
SET status = ' failed ' ,
error_message = % s ,
processed_at = CURRENT_TIMESTAMP
WHERE file_id = % s """ ,
( f " AI extraction fejlede: { error_msg } " , file_id )
)
raise HTTPException ( status_code = 500 , detail = f " AI extraction fejlede: { error_msg } " )
extracted_fields = llm_result
confidence = llm_result . get ( ' confidence ' , 0.75 )
# Store AI extracted data in extractions table
extraction_id = execute_insert (
""" INSERT INTO supplier_invoice_extractions
( file_id , vendor_id , invoice_number , invoice_date , due_date ,
total_amount , currency , document_type , confidence , llm_data )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s , % s ) RETURNING extraction_id """ ,
( file_id , vendor_id ,
llm_result . get ( ' invoice_number ' ) ,
llm_result . get ( ' invoice_date ' ) ,
llm_result . get ( ' due_date ' ) ,
llm_result . get ( ' total_amount ' ) ,
llm_result . get ( ' currency ' , ' DKK ' ) ,
llm_result . get ( ' document_type ' ) ,
confidence ,
json . dumps ( llm_result ) )
)
# Insert line items if extracted
if llm_result . get ( ' lines ' ) :
for idx , line in enumerate ( llm_result [ ' lines ' ] , start = 1 ) :
execute_insert (
""" INSERT INTO extraction_lines
( extraction_id , line_number , description , quantity , unit_price ,
line_total , vat_rate , vat_note , confidence )
VALUES ( % s , % s , % s , % s , % s , % s , % s , % s , % s )
RETURNING line_id """ ,
( extraction_id , idx , line . get ( ' description ' ) ,
line . get ( ' quantity ' ) , line . get ( ' unit_price ' ) ,
line . get ( ' line_total ' ) , line . get ( ' vat_rate ' ) ,
line . get ( ' vat_note ' ) , confidence )
)
# Update file status to ai_extracted
2025-12-07 03:29:54 +01:00
execute_update (
""" UPDATE incoming_files
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
SET status = ' ai_extracted ' , processed_at = CURRENT_TIMESTAMP
2025-12-07 03:29:54 +01:00
WHERE file_id = % s """ ,
( file_id , )
)
2025-12-08 09:15:52 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
logger . info ( f " ✅ AI extraction completed for file { file_id } " )
2025-12-07 03:29:54 +01:00
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Return success with template data or AI extraction result
result = {
2025-12-07 03:29:54 +01:00
" status " : " success " ,
" file_id " : file_id ,
" filename " : file_record [ ' filename ' ] ,
" template_matched " : template_id is not None ,
" template_id " : template_id ,
" vendor_id " : vendor_id ,
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
" confidence " : confidence if template_id else llm_result . get ( ' confidence ' , 0.75 ) ,
2025-12-07 03:29:54 +01:00
" extracted_fields " : extracted_fields ,
2025-12-08 09:15:52 +01:00
" pdf_text " : text [ : 1000 ] if not template_id else text
2025-12-07 03:29:54 +01:00
}
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
# Add warning if no template exists
if not template_id and vendor_id :
vendor = execute_query (
" SELECT name FROM vendors WHERE id = %s " ,
( vendor_id , ) ,
fetchone = True
)
if vendor :
result [ " warning " ] = f " ⚠️ Ingen template fundet for { vendor [ ' name ' ] } - brugte AI extraction (langsommere) "
return result
2025-12-07 03:29:54 +01:00
except HTTPException :
raise
except Exception as e :
raise HTTPException ( status_code = 500 , detail = f " Genbehandling fejlede: { str ( e ) } " )
@router.put ( " /supplier-invoices/templates/ {template_id} " )
async def update_template (
template_id : int ,
template_name : Optional [ str ] = None ,
detection_patterns : Optional [ List [ Dict ] ] = None ,
field_mappings : Optional [ Dict ] = None ,
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
default_product_category : Optional [ str ] = None ,
2025-12-07 03:29:54 +01:00
is_active : Optional [ bool ] = None
) :
""" Opdater eksisterende template """
try :
import json
updates = [ ]
params = [ ]
if template_name :
updates . append ( " template_name = %s " )
params . append ( template_name )
if detection_patterns is not None :
updates . append ( " detection_patterns = %s " )
params . append ( json . dumps ( detection_patterns ) )
if field_mappings is not None :
updates . append ( " field_mappings = %s " )
params . append ( json . dumps ( field_mappings ) )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
if default_product_category is not None :
updates . append ( " default_product_category = %s " )
params . append ( default_product_category )
2025-12-07 03:29:54 +01:00
if is_active is not None :
updates . append ( " is_active = %s " )
params . append ( is_active )
if not updates :
raise HTTPException ( status_code = 400 , detail = " Ingen opdateringer angivet " )
updates . append ( " updated_at = CURRENT_TIMESTAMP " )
params . append ( template_id )
execute_update (
f " UPDATE supplier_invoice_templates SET { ' , ' . join ( updates ) } WHERE template_id = %s " ,
tuple ( params )
)
# Reload templates
template_service . reload_templates ( )
logger . info ( f " ✅ Template { template_id } opdateret " )
return { " message " : " Template opdateret " }
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to update template: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
@router.post ( " /supplier-invoices/templates/invoice2data/ {template_name} /test " )
async def test_invoice2data_template ( template_name : str , request : Dict ) :
"""
Test invoice2data YAML template mod PDF tekst
Request body :
{
" pdf_text " : " Full PDF text content... "
}
Returns samme format som test_template endpoint
"""
try :
pdf_text = request . get ( ' pdf_text ' , ' ' )
if not pdf_text :
raise HTTPException ( status_code = 400 , detail = " pdf_text er påkrævet " )
# Get invoice2data service
invoice2data_service = get_invoice2data_service ( )
# Check if template exists
if template_name not in invoice2data_service . templates :
raise HTTPException ( status_code = 404 , detail = f " Template ' { template_name } ' ikke fundet " )
template_data = invoice2data_service . templates [ template_name ]
# Test extraction
result = invoice2data_service . extract_with_template ( pdf_text , template_name )
if not result :
# Template didn't match
keywords = template_data . get ( ' keywords ' , [ ] )
detection_results = [ ]
for keyword in keywords :
found = str ( keyword ) . lower ( ) in pdf_text . lower ( )
detection_results . append ( {
" pattern " : str ( keyword ) ,
" type " : " keyword " ,
" found " : found ,
" weight " : 0.5
} )
return {
" matched " : False ,
" confidence " : 0.0 ,
" extracted_fields " : { } ,
" line_items " : [ ] ,
" detection_results " : detection_results ,
" template_name " : template_name ,
" error " : " Template matchede ikke PDF ' en "
}
# Extract line items
line_items = [ ]
if ' lines ' in result :
for line in result [ ' lines ' ] :
line_items . append ( {
" line_number " : line . get ( ' line_number ' , ' ' ) ,
" item_number " : line . get ( ' item_number ' , ' ' ) ,
" description " : line . get ( ' description_raw ' , ' ' ) or line . get ( ' description ' , ' ' ) ,
" quantity " : line . get ( ' quantity ' , ' ' ) ,
" unit_price " : line . get ( ' unit_price ' , ' ' ) ,
" line_total " : line . get ( ' line_total ' , ' ' ) ,
# Context fields (circuit/location info)
" circuit_id " : line . get ( ' circuit_id ' , ' ' ) ,
" ip_address " : line . get ( ' ip_address ' , ' ' ) ,
" contract_number " : line . get ( ' contract_number ' , ' ' ) ,
" location_street " : line . get ( ' location_street ' , ' ' ) ,
" location_zip " : line . get ( ' location_zip ' , ' ' ) ,
" location_city " : line . get ( ' location_city ' , ' ' ) ,
} )
# Build detection results
keywords = template_data . get ( ' keywords ' , [ ] )
detection_results = [ ]
matched_count = 0
for keyword in keywords :
found = str ( keyword ) . lower ( ) in pdf_text . lower ( )
if found :
matched_count + = 1
detection_results . append ( {
" pattern " : str ( keyword ) ,
" type " : " keyword " ,
" found " : found ,
" weight " : 0.5
} )
confidence = matched_count / len ( keywords ) if keywords else 1.0
# Remove 'lines' from extracted_fields to avoid duplication
extracted_fields = { k : v for k , v in result . items ( ) if k != ' lines ' }
return {
" matched " : True ,
" confidence " : confidence ,
" extracted_fields " : extracted_fields ,
" line_items " : line_items ,
" detection_results " : detection_results ,
" template_name " : template_name
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Invoice2data template test failed: { e } " , exc_info = True )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-07 03:29:54 +01:00
@router.post ( " /supplier-invoices/templates/ {template_id} /test " )
async def test_template ( template_id : int , request : Dict ) :
"""
Test template mod PDF tekst
Request body :
{
" pdf_text " : " Full PDF text content... "
}
Returns :
{
" matched " : true / false ,
" confidence " : 0.85 ,
" extracted_fields " : {
" invoice_number " : " 12345 " ,
" invoice_date " : " 01/12-25 " ,
" total_amount " : " 1234.56 " ,
" vendor_cvr " : " 12345678 "
} ,
" detection_results " : [
{ " pattern " : " BMC Denmark ApS " , " found " : true , " weight " : 0.5 }
]
}
"""
try :
import re
import json
pdf_text = request . get ( ' pdf_text ' , ' ' )
if not pdf_text :
raise HTTPException ( status_code = 400 , detail = " pdf_text er påkrævet " )
# Fetch template
query = " SELECT * FROM supplier_invoice_templates WHERE template_id = %s "
template = execute_query ( query , ( template_id , ) )
if not template :
raise HTTPException ( status_code = 404 , detail = " Template ikke fundet " )
template = template [ 0 ]
detection_patterns = template . get ( ' detection_patterns ' , [ ] )
field_mappings = template . get ( ' field_mappings ' , { } )
# Test detection patterns
2025-12-08 09:15:52 +01:00
total_score = 0.0
2025-12-07 03:29:54 +01:00
max_score = 0.0
detection_results = [ ]
for pattern in detection_patterns :
pattern_type = pattern . get ( ' type ' , ' text ' )
pattern_value = pattern . get ( ' pattern ' , ' ' )
weight = float ( pattern . get ( ' weight ' , 0.5 ) )
max_score + = weight
found = False
if pattern_type == ' text ' and pattern_value in pdf_text :
found = True
total_score + = weight
detection_results . append ( {
" pattern " : pattern_value ,
" type " : pattern_type ,
" found " : found ,
" weight " : weight
} )
confidence = ( total_score / max_score ) if max_score > 0 else 0.0
matched = confidence > = 0.7 # Match threshold
# Extract fields if matched
extracted_fields = { }
if matched :
for field_name , field_config in field_mappings . items ( ) :
pattern = field_config . get ( ' pattern ' , ' ' )
group = field_config . get ( ' group ' , 1 )
# Skip non-field patterns (lines_start, lines_end, line_item)
if field_name in [ ' lines_start ' , ' lines_end ' , ' line_item ' ] :
continue
try :
match = re . search ( pattern , pdf_text , re . IGNORECASE | re . MULTILINE )
if match and len ( match . groups ( ) ) > = group :
extracted_fields [ field_name ] = match . group ( group ) . strip ( )
except Exception as e :
logger . warning ( f " Pattern match failed for { field_name } : { e } " )
# Extract line items if matched
line_items = [ ]
if matched :
# Extract line items using smart extraction
lines_start = field_mappings . get ( ' lines_start ' , { } ) . get ( ' pattern ' )
lines_end = field_mappings . get ( ' lines_end ' , { } ) . get ( ' pattern ' )
line_pattern = field_mappings . get ( ' line_item ' , { } ) . get ( ' pattern ' )
line_fields = field_mappings . get ( ' line_item ' , { } ) . get ( ' fields ' , [ ] )
if line_pattern or lines_start :
# Extract section between start and end markers
text_section = pdf_text
if lines_start :
try :
start_match = re . search ( lines_start , pdf_text , re . IGNORECASE )
if start_match :
text_section = pdf_text [ start_match . end ( ) : ]
logger . debug ( f " Found lines_start at position { start_match . end ( ) } " )
except Exception as e :
logger . warning ( f " Failed to find lines_start: { e } " )
if lines_end :
try :
end_match = re . search ( lines_end , text_section , re . IGNORECASE )
if end_match :
text_section = text_section [ : end_match . start ( ) ]
logger . debug ( f " Found lines_end at position { end_match . start ( ) } " )
except Exception as e :
logger . warning ( f " Failed to find lines_end: { e } " )
# Try pattern first, then smart extraction
if line_pattern :
try :
for match in re . finditer ( line_pattern , text_section , re . MULTILINE ) :
line_data = {
' line_number ' : len ( line_items ) + 1 ,
' raw_text ' : match . group ( 0 )
}
for idx , field_name in enumerate ( line_fields , start = 1 ) :
if idx < = len ( match . groups ( ) ) :
line_data [ field_name ] = match . group ( idx ) . strip ( )
line_items . append ( line_data )
except Exception as e :
logger . error ( f " ❌ Pattern extraction failed: { e } " )
# Fallback to smart extraction if no lines found
if not line_items :
logger . info ( " 🧠 Trying smart extraction... " )
logger . debug ( f " Text section length: { len ( text_section ) } , first 500 chars: { text_section [ : 500 ] } " )
line_items = _smart_extract_lines ( text_section )
logger . info ( f " 🧠 Smart extraction returned { len ( line_items ) } items " )
if line_items :
logger . info ( f " 📦 Extracted { len ( line_items ) } line items from test " )
else :
logger . warning ( f " ⚠️ No line items matched. Section length: { len ( text_section ) } chars " )
logger . debug ( f " Section preview: { text_section [ : 300 ] } " )
logger . info ( f " 🧪 Template { template_id } test: matched= { matched } , confidence= { confidence : .2f } , lines= { len ( line_items ) } " )
return {
" matched " : matched ,
" confidence " : round ( confidence , 2 ) ,
" extracted_fields " : extracted_fields ,
" line_items " : line_items ,
" detection_results " : detection_results ,
" template_name " : template . get ( ' template_name ' , ' ' )
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Template test failed: { e } " , exc_info = True )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
feat: Implement quick analysis on PDF upload for CVR, document type, and number extraction
- Added `check_invoice_number_exists` method in `EconomicService` to verify invoice numbers in e-conomic journals.
- Introduced `quick_analysis_on_upload` method in `OllamaService` for extracting critical fields from uploaded PDFs, including CVR, document type, and document number.
- Created migration script to add new fields for storing detected CVR, vendor ID, document type, and document number in the `incoming_files` table.
- Developed comprehensive tests for the quick analysis functionality, validating CVR detection, document type identification, and invoice number extraction.
2025-12-09 14:54:33 +01:00
@router.put ( " /supplier-invoices/templates/invoice2data/ {template_name} /category " )
async def update_yaml_category ( template_name : str , request : Dict ) :
"""
Opdater default_product_category i YAML template fil
Request body :
{
" category " : " drift " / / varesalg , drift , anlæg , abonnement , lager , udlejning
}
"""
try :
import yaml
from pathlib import Path
new_category = request . get ( ' category ' )
if not new_category :
raise HTTPException ( status_code = 400 , detail = " category er påkrævet " )
# Validate category
valid_categories = [ ' varesalg ' , ' drift ' , ' anlæg ' , ' abonnement ' , ' lager ' , ' udlejning ' ]
if new_category not in valid_categories :
raise HTTPException ( status_code = 400 , detail = f " Ugyldig kategori. Skal være en af: { ' , ' . join ( valid_categories ) } " )
# Find YAML file
templates_dir = Path ( __file__ ) . parent . parent . parent . parent / ' data ' / ' invoice_templates '
yaml_file = templates_dir / f " { template_name } .yml "
if not yaml_file . exists ( ) :
raise HTTPException ( status_code = 404 , detail = f " YAML fil ikke fundet: { template_name } .yml " )
# Load YAML
with open ( yaml_file , ' r ' , encoding = ' utf-8 ' ) as f :
template_data = yaml . safe_load ( f )
# Update category
template_data [ ' default_product_category ' ] = new_category
# Save YAML with preserved formatting
with open ( yaml_file , ' w ' , encoding = ' utf-8 ' ) as f :
yaml . dump ( template_data , f , default_flow_style = False , allow_unicode = True , sort_keys = False )
# Reload invoice2data service to pick up changes
invoice2data_service = get_invoice2data_service ( )
invoice2data_service . __init__ ( ) # Reinitialize to reload templates
logger . info ( f " ✅ Updated category for { template_name } .yml to { new_category } " )
return {
" message " : " Kategori opdateret " ,
" template_name " : template_name ,
" new_category " : new_category
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to update YAML category: { e } " , exc_info = True )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
@router.get ( " /supplier-invoices/templates/invoice2data/ {template_name} /content " )
async def get_yaml_content ( template_name : str ) :
"""
Hent råt YAML indhold fra template fil
Returns :
{
" content " : " issuer: DCS ApS \n keywords: ... "
}
"""
try :
from pathlib import Path
# Find template file
template_dir = Path ( " data/invoice_templates " )
template_file = template_dir / f " { template_name } .yml "
if not template_file . exists ( ) :
raise HTTPException ( status_code = 404 , detail = f " Template fil ikke fundet: { template_name } .yml " )
# Read file content
content = template_file . read_text ( encoding = ' utf-8 ' )
return {
" template_name " : template_name ,
" filename " : f " { template_name } .yml " ,
" content " : content
}
except HTTPException :
raise
except Exception as e :
logger . error ( f " ❌ Failed to read YAML content: { e } " , exc_info = True )
raise HTTPException ( status_code = 500 , detail = str ( e ) )
2025-12-07 03:29:54 +01:00
@router.delete ( " /supplier-invoices/templates/ {template_id} " )
async def delete_template ( template_id : int ) :
""" Slet template (soft delete - sæt is_active=false) """
try :
execute_update (
" UPDATE supplier_invoice_templates SET is_active = false WHERE template_id = %s " ,
( template_id , )
)
template_service . reload_templates ( )
logger . info ( f " ✅ Template { template_id } deaktiveret " )
return { " message " : " Template slettet " }
except Exception as e :
logger . error ( f " ❌ Failed to delete template: { e } " )
raise HTTPException ( status_code = 500 , detail = str ( e ) )