| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- -- =============================================
- -- 清理重复的 study_instance_uid 记录
- -- 保留最早创建的那条,删除其他重复的
- -- 日期:2025-12-30
- -- =============================================
- USE qconline;
- -- =============================================
- -- 方案1:查看重复数据(安全,不删除)
- -- =============================================
- SELECT '=== 重复数据报告 ===' AS '';
- SELECT
- institution_id,
- study_instance_uid,
- COUNT(*) as duplicate_count,
- GROUP_CONCAT(study_id ORDER BY create_time) as all_study_ids,
- GROUP_CONCAT(id ORDER BY create_time) as all_ids,
- MIN(create_time) as first_created,
- MAX(create_time) as last_created,
- MIN(study_id) as first_study_id,
- MIN(id) as first_id
- FROM study_info
- GROUP BY institution_id, study_instance_uid
- HAVING COUNT(*) > 1;
- -- =============================================
- -- 方案2:标记要删除的重复记录(安全,不实际删除)
- -- =============================================
- SELECT '=== 标记要删除的重复记录 ===' AS '';
- -- 创建临时表存储要保留的记录ID
- DROP TEMPORARY TABLE IF EXISTS temp_keep_ids;
- CREATE TEMPORARY TABLE temp_keep_ids AS
- SELECT
- MIN(id) as keep_id
- FROM study_info
- GROUP BY institution_id, study_instance_uid
- HAVING COUNT(*) > 1;
- -- 查看将被删除的记录
- SELECT
- s.id,
- s.institution_id,
- s.study_instance_uid,
- s.study_id,
- s.create_time
- FROM study_info s
- WHERE s.id NOT IN (SELECT keep_id FROM temp_keep_ids)
- AND EXISTS (
- SELECT 1 FROM study_info s2
- WHERE s2.institution_id = s.institution_id
- AND s2.study_instance_uid = s.study_instance_uid
- GROUP BY s2.institution_id, s2.study_instance_uid
- HAVING COUNT(*) > 1
- )
- ORDER BY s.institution_id, s.study_instance_uid, s.create_time;
- -- =============================================
- -- 方案3:备份重复记录到历史表(推荐)
- -- =============================================
- SELECT '=== 备份重复记录到历史表 ===' AS '';
- -- 创建备份表(如果不存在)
- CREATE TABLE IF NOT EXISTS study_info_backup_20251230 (
- LIKE study_info
- );
- -- 插入重复记录到备份表
- INSERT INTO study_info_backup_20251230
- SELECT *
- FROM study_info
- WHERE id NOT IN (SELECT keep_id FROM temp_keep_ids)
- AND EXISTS (
- SELECT 1 FROM study_info s2
- WHERE s2.institution_id = study_info.institution_id
- AND s2.study_instance_uid = study_info.study_instance_uid
- GROUP BY s2.institution_id, s2.study_instance_uid
- HAVING COUNT(*) > 1
- );
- SELECT CONCAT('已备份 ', ROW_COUNT(), ' 条重复记录到 study_info_backup_20251230') AS '';
- -- 查看备份的记录数
- SELECT COUNT(*) as backed_up_count FROM study_info_backup_20251230;
- -- =============================================
- -- 方案4:删除重复记录(谨慎使用)
- -- =============================================
- -- ⚠️ 警告:此操作会永久删除数据,请先备份!
- -- ⚠️ 建议先执行方案1-3,确认无误后再执行本方案
- SELECT '=== 删除重复记录(谨慎使用)===' AS '';
- -- 取消下面的注释来执行删除操作
- /*
- -- 删除重复记录(保留最早的那条)
- DELETE FROM study_info
- WHERE id NOT IN (SELECT keep_id FROM temp_keep_ids)
- AND EXISTS (
- SELECT 1 FROM study_info s2
- WHERE s2.institution_id = study_info.institution_id
- AND s2.study_instance_uid = study_info.study_instance_uid
- GROUP BY s2.institution_id, s2.study_instance_uid
- HAVING COUNT(*) > 1
- );
- SELECT CONCAT('已删除 ', ROW_COUNT(), ' 条重复记录') AS '';
- */
- -- =============================================
- -- 方案5:智能合并重复记录(高级)
- -- =============================================
- SELECT '=== 智能合并重复记录说明 ===' AS '';
- /*
- 智能合并策略:
- 1. 保留最早创建的记录作为主记录
- 2. 合并统计信息:
- - series_count = MAX(series_count)
- - image_count = SUM(image_count)
- 3. 合并文件列表(如果 dicom_file_path 不同)
- 4. 更新 update_time 为当前时间
- 5. 删除其他重复记录
- ⚠️ 此操作较复杂,建议手动编写存储过程执行
- */
- -- =============================================
- -- 验证清理结果
- -- =============================================
- SELECT '=== 验证是否还有重复 ===' AS '';
- SELECT
- institution_id,
- study_instance_uid,
- COUNT(*) as count
- FROM study_info
- GROUP BY institution_id, study_instance_uid
- HAVING COUNT(*) > 1;
- -- 如果上面的查询结果为空,说明清理成功
- -- =============================================
- -- 清理建议
- -- =============================================
- /*
- 执行顺序建议:
- 1. 先执行方案1,查看重复数据情况
- 2. 执行方案2,查看哪些记录会被删除
- 3. 执行方案3,备份重复记录
- 4. 检查备份表数据,确认可以删除
- 5. 执行方案4,删除重复记录
- 6. 再次执行方案1的验证查询,确认无重复
- 7. 执行 add_study_unique_constraint.sql 添加唯一约束
- 注意事项:
- - 删除前务必备份!
- - 建议在测试环境先执行
- - 可以保留备份表一段时间,确认无问题后再删除
- - 删除操作会影响相关的 series_info 记录,需要级联处理
- */
|