bmc_hub/migrations/013_email_system.sql
Christian 8791e34f4e feat: Implement email processing system with scheduler, fetching, classification, and rule matching
- Added EmailProcessorService to orchestrate email workflow: fetching, saving, classifying, and matching rules.
- Introduced EmailScheduler for background processing of emails every 5 minutes.
- Developed EmailService to handle email fetching from IMAP and Microsoft Graph API.
- Created database migration for email system, including tables for email messages, rules, attachments, and analysis.
- Implemented AI classification and extraction for invoices and time confirmations.
- Added logging for better traceability and error handling throughout the email processing pipeline.
2025-12-11 02:31:29 +01:00

245 lines
8.0 KiB
PL/PgSQL

-- Migration 013: Email System for Invoice and Time Confirmation Processing
-- Based on OmniSync email architecture adapted for BMC Hub
-- Drop existing tables if any (clean slate)
DROP TABLE IF EXISTS email_analysis CASCADE;
DROP TABLE IF EXISTS email_attachments CASCADE;
DROP TABLE IF EXISTS email_messages CASCADE;
DROP TABLE IF EXISTS email_rules CASCADE;
DROP VIEW IF EXISTS v_unprocessed_emails;
DROP VIEW IF EXISTS v_email_activity;
DROP FUNCTION IF EXISTS update_email_messages_updated_at();
DROP FUNCTION IF EXISTS update_email_rules_updated_at();
-- Email Rules Table (create first - referenced by email_messages)
CREATE TABLE email_rules (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT,
-- Rule Conditions (JSON for flexibility)
conditions JSONB NOT NULL,
-- Rule Actions
action_type VARCHAR(50) NOT NULL,
action_params JSONB,
-- Priority and Status
priority INTEGER DEFAULT 100,
enabled BOOLEAN DEFAULT true,
-- Statistics
match_count INTEGER DEFAULT 0,
last_matched_at TIMESTAMP,
-- Audit
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by_user_id INTEGER,
FOREIGN KEY (created_by_user_id) REFERENCES users(id) ON DELETE SET NULL
);
-- Email Messages Table (main storage)
CREATE TABLE email_messages (
id SERIAL PRIMARY KEY,
message_id VARCHAR(500) UNIQUE NOT NULL,
subject TEXT,
sender_email VARCHAR(255),
sender_name VARCHAR(255),
recipient_email VARCHAR(255),
cc TEXT,
body_text TEXT,
body_html TEXT,
received_date TIMESTAMP,
folder VARCHAR(100) DEFAULT 'INBOX',
-- AI Classification
classification VARCHAR(50),
confidence_score DECIMAL(3,2),
classification_date TIMESTAMP,
-- Rule Matching
rule_id INTEGER,
auto_processed BOOLEAN DEFAULT false,
-- Linking
supplier_id INTEGER,
customer_id INTEGER,
linked_case_id INTEGER,
linked_time_entry_id INTEGER,
linked_purchase_id INTEGER,
-- Metadata
has_attachments BOOLEAN DEFAULT false,
attachment_count INTEGER DEFAULT 0,
is_read BOOLEAN DEFAULT false,
status VARCHAR(50) DEFAULT 'new',
approval_status VARCHAR(50),
-- Extraction Fields
extracted_invoice_number VARCHAR(100),
extracted_order_number VARCHAR(100),
extracted_tracking_number VARCHAR(100),
extracted_amount DECIMAL(15,2),
extracted_due_date DATE,
-- Audit
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
processed_at TIMESTAMP,
processed_by_user_id INTEGER,
-- Soft Delete
deleted_at TIMESTAMP,
deleted_by_user_id INTEGER
);
-- Email Attachments Table
CREATE TABLE email_attachments (
id SERIAL PRIMARY KEY,
email_id INTEGER NOT NULL,
filename VARCHAR(255) NOT NULL,
content_type VARCHAR(100),
size_bytes INTEGER,
file_path TEXT, -- Path in filesystem or object storage
-- Extraction Status
extracted BOOLEAN DEFAULT false,
extraction_error TEXT,
-- Metadata
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (email_id) REFERENCES email_messages(id) ON DELETE CASCADE
);
-- Email Analysis Cache (for AI classifications)
CREATE TABLE email_analysis (
id SERIAL PRIMARY KEY,
email_id INTEGER NOT NULL,
analysis_type VARCHAR(50) NOT NULL, -- classification, extraction, summary
-- AI Results
result_json JSONB,
confidence_score DECIMAL(3,2),
model_used VARCHAR(100),
-- Performance
processing_time_ms INTEGER,
-- Metadata
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (email_id) REFERENCES email_messages(id) ON DELETE CASCADE,
UNIQUE(email_id, analysis_type)
);
-- Indexes for Performance
CREATE INDEX idx_email_messages_sender ON email_messages(sender_email) WHERE deleted_at IS NULL;
CREATE INDEX idx_email_messages_classification ON email_messages(classification) WHERE deleted_at IS NULL;
CREATE INDEX idx_email_messages_status ON email_messages(status) WHERE deleted_at IS NULL;
CREATE INDEX idx_email_messages_received_date ON email_messages(received_date DESC) WHERE deleted_at IS NULL;
CREATE INDEX idx_email_messages_message_id ON email_messages(message_id);
CREATE INDEX idx_email_messages_supplier ON email_messages(supplier_id) WHERE supplier_id IS NOT NULL;
CREATE INDEX idx_email_messages_customer ON email_messages(customer_id) WHERE customer_id IS NOT NULL;
CREATE INDEX idx_email_messages_linked_case ON email_messages(linked_case_id) WHERE linked_case_id IS NOT NULL;
CREATE INDEX idx_email_attachments_email_id ON email_attachments(email_id);
CREATE INDEX idx_email_analysis_email_id ON email_analysis(email_id);
CREATE INDEX idx_email_rules_priority ON email_rules(priority) WHERE enabled = true;
-- Update Trigger for email_messages
CREATE OR REPLACE FUNCTION update_email_messages_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_email_messages_updated_at
BEFORE UPDATE ON email_messages
FOR EACH ROW
EXECUTE FUNCTION update_email_messages_updated_at();
-- Update Trigger for email_rules
CREATE OR REPLACE FUNCTION update_email_rules_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_email_rules_updated_at
BEFORE UPDATE ON email_rules
FOR EACH ROW
EXECUTE FUNCTION update_email_rules_updated_at();
-- View for unprocessed emails
CREATE OR REPLACE VIEW v_unprocessed_emails AS
SELECT
em.*,
COUNT(ea.id) as attachment_count_actual,
er.name as rule_name,
v.name as supplier_name,
tc.customer_name,
tcase.title as case_title
FROM email_messages em
LEFT JOIN email_attachments ea ON em.id = ea.email_id
LEFT JOIN email_rules er ON em.rule_id = er.id
LEFT JOIN vendors v ON em.supplier_id = v.id
LEFT JOIN tmodule_customers tc ON em.customer_id = tc.id
LEFT JOIN tmodule_cases tcase ON em.linked_case_id = tcase.id
WHERE em.deleted_at IS NULL
AND em.status IN ('new', 'error')
GROUP BY em.id, er.name, v.name, tc.customer_name, tcase.title
ORDER BY em.received_date DESC;
-- View for recent email activity
CREATE OR REPLACE VIEW v_email_activity AS
SELECT
DATE(em.received_date) as activity_date,
em.classification,
COUNT(*) as email_count,
COUNT(CASE WHEN em.auto_processed THEN 1 END) as auto_processed_count,
AVG(em.confidence_score) as avg_confidence
FROM email_messages em
WHERE em.deleted_at IS NULL
AND em.received_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY DATE(em.received_date), em.classification
ORDER BY activity_date DESC, email_count DESC;
-- Sample email rules for common scenarios
INSERT INTO email_rules (name, description, conditions, action_type, action_params, priority, created_by_user_id)
VALUES
('Mark Spam - Known Domains',
'Automatically mark emails from known spam domains as spam',
'{"sender_domain": ["spamsite.com", "marketing-spam.net"], "action": "mark_spam"}',
'mark_spam',
'{}',
10,
1),
('Link Supplier Invoices',
'Automatically link invoices from known supplier email domains',
'{"classification": "invoice", "sender_domain_match": "supplier"}',
'link_supplier',
'{"auto_match_domain": true}',
50,
1),
('Time Confirmation Auto-Link',
'Link time confirmation emails to cases based on case number in subject',
'{"classification": "time_confirmation", "subject_regex": "CC[0-9]{4}"}',
'link_case',
'{"extract_case_from_subject": true}',
30,
1)
ON CONFLICT DO NOTHING;
COMMENT ON TABLE email_messages IS 'Main email storage with AI classification and linking';
COMMENT ON TABLE email_attachments IS 'Email attachment metadata and file references';
COMMENT ON TABLE email_analysis IS 'Cache for AI analysis results (classification, extraction)';
COMMENT ON TABLE email_rules IS 'Automatic email processing rules with priority matching';