bmc_hub/migrations/157_fix_thread_keys_multi_thread.sql

58 lines
2.5 KiB
MySQL
Raw Normal View History

-- Migration 157: Fix thread_keys - restore correct per-conversation grouping
-- Migration 156 Step 2 incorrectly forced ALL emails in a SAG to share one thread_key.
-- This migration restores the correct thread_key based on actual email conversation headers.
-- Step 1: Restore thread_key for emails that have a Graph conversationId stored
-- (these were overwritten by the dominant-thread backfill).
-- The conversationId is the most reliable conversation identifier from Exchange/Graph.
-- Step 2: Re-derive thread_keys from actual email headers.
-- Priority: conversationId (if provider) > parent's thread_key > References[0] > In-Reply-To > message_id
-- We re-derive for ALL emails to undo the forced unification.
-- First, recalculate based on actual References/In-Reply-To parent chain.
-- For emails that are replies (have in_reply_to or email_references), adopt the
-- thread_key of the ACTUAL parent email (matched by message_id), not just any email in the SAG.
UPDATE email_messages child
SET thread_key = parent.thread_key,
updated_at = CURRENT_TIMESTAMP
FROM email_messages parent
WHERE child.deleted_at IS NULL
AND parent.deleted_at IS NULL
AND parent.thread_key IS NOT NULL
AND TRIM(parent.thread_key) != ''
AND (
-- Match via in_reply_to -> parent message_id
(
child.in_reply_to IS NOT NULL
AND TRIM(child.in_reply_to) != ''
AND LOWER(REGEXP_REPLACE(parent.message_id, '[<>\s]', '', 'g'))
= LOWER(REGEXP_REPLACE(
(REGEXP_SPLIT_TO_ARRAY(TRIM(child.in_reply_to), E'[\\s,]+'))[1],
'[<>\s]', '', 'g'
))
)
OR
-- Match via first reference -> parent message_id
(
child.email_references IS NOT NULL
AND TRIM(child.email_references) != ''
AND LOWER(REGEXP_REPLACE(parent.message_id, '[<>\s]', '', 'g'))
= LOWER(REGEXP_REPLACE(
(REGEXP_SPLIT_TO_ARRAY(TRIM(child.email_references), E'[\\s,]+'))[1],
'[<>\s]', '', 'g'
))
)
);
-- For emails that are conversation starters (no in_reply_to, no references),
-- reset thread_key to their own message_id so they start their own thread.
UPDATE email_messages
SET thread_key = LOWER(REGEXP_REPLACE(COALESCE(message_id, ''), '[<>\s]', '', 'g')),
updated_at = CURRENT_TIMESTAMP
WHERE deleted_at IS NULL
AND (in_reply_to IS NULL OR TRIM(in_reply_to) = '')
AND (email_references IS NULL OR TRIM(email_references) = '')
AND message_id IS NOT NULL
AND TRIM(message_id) != '';