From bf9c849467cfc94f34fd32cd766083e7221533be Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:54:21 +1300 Subject: [PATCH] faster migration #348 --- .../26138ecda7c3_unique_user_ap_profile_id.py | 113 ++++++++---------- 1 file changed, 50 insertions(+), 63 deletions(-) diff --git a/migrations/versions/26138ecda7c3_unique_user_ap_profile_id.py b/migrations/versions/26138ecda7c3_unique_user_ap_profile_id.py index cf84f9e2..fb621191 100644 --- a/migrations/versions/26138ecda7c3_unique_user_ap_profile_id.py +++ b/migrations/versions/26138ecda7c3_unique_user_ap_profile_id.py @@ -20,79 +20,66 @@ depends_on = None def upgrade(): # Find duplicate users by ap_profile_id dupes_query = text(''' - SELECT id FROM "user" - WHERE ap_profile_id IN ( - SELECT ap_profile_id FROM "user" - GROUP BY ap_profile_id - HAVING COUNT(*) > 1 - ) - ''') + SELECT ap_profile_id FROM "user" + GROUP BY ap_profile_id + HAVING COUNT(*) > 1 + ''') conn = op.get_bind() - dupes = conn.execute(dupes_query).scalars() + duplicate_profiles = conn.execute(dupes_query).scalars() print('Cleaning up duplicate users, this may take a while...') - for d in dupes: - user_query = text('SELECT id, ap_profile_id FROM "user" WHERE id = :id') - user = conn.execute(user_query, {"id": d}).first() - if not user: + for profile_id in duplicate_profiles: + if profile_id is None: continue - - # Find users with the same ap_profile_id + # Get all users with the same ap_profile_id, ordered by ID users_query = text(''' - SELECT id FROM "user" - WHERE ap_profile_id = :ap_profile_id - ORDER BY id - ''') - users = conn.execute(users_query, {"ap_profile_id": user.ap_profile_id}).fetchall() + SELECT id FROM "user" + WHERE ap_profile_id = :ap_profile_id + ORDER BY id + ''') + users = conn.execute(users_query, {"ap_profile_id": profile_id}).fetchall() - first = True - new_id = None + # Set the lowest ID as the new_id, and collect other IDs to update/delete + new_id = users[0].id + old_ids = [user.id for user in users[1:]] - for u in users: - if first: - first = False - new_id = u.id - continue + if old_ids: + # Update tables with batch IN clause + conn.execute(text('UPDATE "post" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "post_vote" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "post_vote" WHERE author_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "post_reply" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "post_reply_vote" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "post_reply_vote" WHERE author_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "notification" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "notification" SET author_id = :new_id WHERE author_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "notification_subscription" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "community_member" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "instance_role" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "mod_log" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "chat_message" SET sender_id = :new_id WHERE sender_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "conversation" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "chat_message" SET recipient_id = :new_id WHERE recipient_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "conversation_member" WHERE user_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "community" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "domain_block" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "community_block" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user_follower" SET local_user_id = :new_id WHERE local_user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user_follower" SET remote_user_id = :new_id WHERE remote_user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "community_ban" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user_note" SET target_id = :new_id WHERE target_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user_block" SET blocked_id = :new_id WHERE blocked_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "community_join_request" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "filter" SET user_id = :new_id WHERE user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "report" SET reporter_id = :new_id WHERE reporter_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "report" SET suspect_user_id = :new_id WHERE suspect_user_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user_follow_request" SET follow_id = :new_id WHERE follow_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) - if new_id: - old_id = u.id + # Delete the duplicate users + conn.execute(text('DELETE FROM "user" WHERE id IN :old_ids'), {"old_ids": tuple(old_ids)}) - # Update references in various tables - conn.execute(text('UPDATE "post" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "post_vote" WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "post_vote" WHERE author_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "post_reply" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "post_reply_vote" WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "post_reply_vote" WHERE author_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "notification" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "notification" SET author_id = :new_id WHERE author_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "notification_subscription" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "community_member" WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "instance_role" WHERE user_id = :old_id'), {"old_id": old_id}) - conn.execute(text('DELETE FROM "mod_log" WHERE user_id = :old_id'), {"old_id": old_id}) - conn.execute(text('UPDATE "chat_message" SET sender_id = :new_id WHERE sender_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "conversation" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "chat_message" SET recipient_id = :new_id WHERE recipient_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('DELETE FROM "conversation_member" WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "community" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "domain_block" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "community_block" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "user_follower" SET local_user_id = :new_id WHERE local_user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "user_follower" SET remote_user_id = :new_id WHERE remote_user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "community_ban" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "user_note" SET target_id = :new_id WHERE target_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "user_block" SET blocked_id = :new_id WHERE blocked_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "community_join_request" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "filter" SET user_id = :new_id WHERE user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "report" SET reporter_id = :new_id WHERE reporter_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "report" SET suspect_user_id = :new_id WHERE suspect_user_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - conn.execute(text('UPDATE "user_follow_request" SET follow_id = :new_id WHERE follow_id = :old_id'), {"new_id": new_id, "old_id": old_id}) - - # Delete the duplicate user - conn.execute(text('DELETE FROM "user" WHERE id = :old_id'), {"old_id": old_id}) - - # ### commands auto generated by Alembic - please adjust! ### + # Finalize by dropping and recreating the unique index on ap_profile_id with op.batch_alter_table('user', schema=None) as batch_op: batch_op.drop_index('ix_user_ap_profile_id') batch_op.create_index(batch_op.f('ix_user_ap_profile_id'), ['ap_profile_id'], unique=True)