-- Migration 157: Fix thread_keys - restore correct per-conversation grouping -- Migration 156 Step 2 incorrectly forced ALL emails in a SAG to share one thread_key. -- This migration restores the correct thread_key based on actual email conversation headers. -- Step 1: Restore thread_key for emails that have a Graph conversationId stored -- (these were overwritten by the dominant-thread backfill). -- The conversationId is the most reliable conversation identifier from Exchange/Graph. -- Step 2: Re-derive thread_keys from actual email headers. -- Priority: conversationId (if provider) > parent's thread_key > References[0] > In-Reply-To > message_id -- We re-derive for ALL emails to undo the forced unification. -- First, recalculate based on actual References/In-Reply-To parent chain. -- For emails that are replies (have in_reply_to or email_references), adopt the -- thread_key of the ACTUAL parent email (matched by message_id), not just any email in the SAG. UPDATE email_messages child SET thread_key = parent.thread_key, updated_at = CURRENT_TIMESTAMP FROM email_messages parent WHERE child.deleted_at IS NULL AND parent.deleted_at IS NULL AND parent.thread_key IS NOT NULL AND TRIM(parent.thread_key) != '' AND ( -- Match via in_reply_to -> parent message_id ( child.in_reply_to IS NOT NULL AND TRIM(child.in_reply_to) != '' AND LOWER(REGEXP_REPLACE(parent.message_id, '[<>\s]', '', 'g')) = LOWER(REGEXP_REPLACE( (REGEXP_SPLIT_TO_ARRAY(TRIM(child.in_reply_to), E'[\\s,]+'))[1], '[<>\s]', '', 'g' )) ) OR -- Match via first reference -> parent message_id ( child.email_references IS NOT NULL AND TRIM(child.email_references) != '' AND LOWER(REGEXP_REPLACE(parent.message_id, '[<>\s]', '', 'g')) = LOWER(REGEXP_REPLACE( (REGEXP_SPLIT_TO_ARRAY(TRIM(child.email_references), E'[\\s,]+'))[1], '[<>\s]', '', 'g' )) ) ); -- For emails that are conversation starters (no in_reply_to, no references), -- reset thread_key to their own message_id so they start their own thread. UPDATE email_messages SET thread_key = LOWER(REGEXP_REPLACE(COALESCE(message_id, ''), '[<>\s]', '', 'g')), updated_at = CURRENT_TIMESTAMP WHERE deleted_at IS NULL AND (in_reply_to IS NULL OR TRIM(in_reply_to) = '') AND (email_references IS NULL OR TRIM(email_references) = '') AND message_id IS NOT NULL AND TRIM(message_id) != '';