-- ============================================= -- 清理重复的 study_instance_uid 记录 -- 保留最早创建的那条,删除其他重复的 -- 日期:2025-12-30 -- ============================================= USE qconline; -- ============================================= -- 方案1:查看重复数据(安全,不删除) -- ============================================= SELECT '=== 重复数据报告 ===' AS ''; SELECT institution_id, study_instance_uid, COUNT(*) as duplicate_count, GROUP_CONCAT(study_id ORDER BY create_time) as all_study_ids, GROUP_CONCAT(id ORDER BY create_time) as all_ids, MIN(create_time) as first_created, MAX(create_time) as last_created, MIN(study_id) as first_study_id, MIN(id) as first_id FROM study_info GROUP BY institution_id, study_instance_uid HAVING COUNT(*) > 1; -- ============================================= -- 方案2:标记要删除的重复记录(安全,不实际删除) -- ============================================= SELECT '=== 标记要删除的重复记录 ===' AS ''; -- 创建临时表存储要保留的记录ID DROP TEMPORARY TABLE IF EXISTS temp_keep_ids; CREATE TEMPORARY TABLE temp_keep_ids AS SELECT MIN(id) as keep_id FROM study_info GROUP BY institution_id, study_instance_uid HAVING COUNT(*) > 1; -- 查看将被删除的记录 SELECT s.id, s.institution_id, s.study_instance_uid, s.study_id, s.create_time FROM study_info s WHERE s.id NOT IN (SELECT keep_id FROM temp_keep_ids) AND EXISTS ( SELECT 1 FROM study_info s2 WHERE s2.institution_id = s.institution_id AND s2.study_instance_uid = s.study_instance_uid GROUP BY s2.institution_id, s2.study_instance_uid HAVING COUNT(*) > 1 ) ORDER BY s.institution_id, s.study_instance_uid, s.create_time; -- ============================================= -- 方案3:备份重复记录到历史表(推荐) -- ============================================= SELECT '=== 备份重复记录到历史表 ===' AS ''; -- 创建备份表(如果不存在) CREATE TABLE IF NOT EXISTS study_info_backup_20251230 ( LIKE study_info ); -- 插入重复记录到备份表 INSERT INTO study_info_backup_20251230 SELECT * FROM study_info WHERE id NOT IN (SELECT keep_id FROM temp_keep_ids) AND EXISTS ( SELECT 1 FROM study_info s2 WHERE s2.institution_id = study_info.institution_id AND s2.study_instance_uid = study_info.study_instance_uid GROUP BY s2.institution_id, s2.study_instance_uid HAVING COUNT(*) > 1 ); SELECT CONCAT('已备份 ', ROW_COUNT(), ' 条重复记录到 study_info_backup_20251230') AS ''; -- 查看备份的记录数 SELECT COUNT(*) as backed_up_count FROM study_info_backup_20251230; -- ============================================= -- 方案4:删除重复记录(谨慎使用) -- ============================================= -- ⚠️ 警告:此操作会永久删除数据,请先备份! -- ⚠️ 建议先执行方案1-3,确认无误后再执行本方案 SELECT '=== 删除重复记录(谨慎使用)===' AS ''; -- 取消下面的注释来执行删除操作 /* -- 删除重复记录(保留最早的那条) DELETE FROM study_info WHERE id NOT IN (SELECT keep_id FROM temp_keep_ids) AND EXISTS ( SELECT 1 FROM study_info s2 WHERE s2.institution_id = study_info.institution_id AND s2.study_instance_uid = study_info.study_instance_uid GROUP BY s2.institution_id, s2.study_instance_uid HAVING COUNT(*) > 1 ); SELECT CONCAT('已删除 ', ROW_COUNT(), ' 条重复记录') AS ''; */ -- ============================================= -- 方案5:智能合并重复记录(高级) -- ============================================= SELECT '=== 智能合并重复记录说明 ===' AS ''; /* 智能合并策略: 1. 保留最早创建的记录作为主记录 2. 合并统计信息: - series_count = MAX(series_count) - image_count = SUM(image_count) 3. 合并文件列表(如果 dicom_file_path 不同) 4. 更新 update_time 为当前时间 5. 删除其他重复记录 ⚠️ 此操作较复杂,建议手动编写存储过程执行 */ -- ============================================= -- 验证清理结果 -- ============================================= SELECT '=== 验证是否还有重复 ===' AS ''; SELECT institution_id, study_instance_uid, COUNT(*) as count FROM study_info GROUP BY institution_id, study_instance_uid HAVING COUNT(*) > 1; -- 如果上面的查询结果为空,说明清理成功 -- ============================================= -- 清理建议 -- ============================================= /* 执行顺序建议: 1. 先执行方案1,查看重复数据情况 2. 执行方案2,查看哪些记录会被删除 3. 执行方案3,备份重复记录 4. 检查备份表数据,确认可以删除 5. 执行方案4,删除重复记录 6. 再次执行方案1的验证查询,确认无重复 7. 执行 add_study_unique_constraint.sql 添加唯一约束 注意事项: - 删除前务必备份! - 建议在测试环境先执行 - 可以保留备份表一段时间,确认无问题后再删除 - 删除操作会影响相关的 series_info 记录,需要级联处理 */