From c135b9c51f2fc67a4fedca3b44465baeb5ddd59e Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Sun, 24 Nov 2024 16:32:22 +1300 Subject: [PATCH] clean up duplicate instances #348 --- app/activitypub/util.py | 9 ++- app/models.py | 2 +- .../d88b49617de0_unique_instance_domain.py | 74 +++++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 migrations/versions/d88b49617de0_unique_instance_domain.py diff --git a/app/activitypub/util.py b/app/activitypub/util.py index f0811f7a..5c1807a2 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -1208,7 +1208,7 @@ def find_reported_object(ap_id) -> Union[User, Post, PostReply, None]: def find_instance_id(server): - server = server.strip() + server = server.strip().lower() instance = Instance.query.filter_by(domain=server).first() if instance: return instance.id @@ -1216,8 +1216,11 @@ def find_instance_id(server): # Our instance does not know about {server} yet. Initially, create a sparse row in the 'instance' table and spawn a background # task to update the row with more details later new_instance = Instance(domain=server, software='unknown', created_at=utcnow(), trusted=server == 'piefed.social') - db.session.add(new_instance) - db.session.commit() + try: + db.session.add(new_instance) + db.session.commit() + except IntegrityError: + return Instance.query.filter_by(domain=server).one() # Spawn background task to fill in more details new_instance_profile(new_instance.id) diff --git a/app/models.py b/app/models.py index 96bec31c..caf7a414 100644 --- a/app/models.py +++ b/app/models.py @@ -52,7 +52,7 @@ class AllowedInstances(db.Model): class Instance(db.Model): id = db.Column(db.Integer, primary_key=True) - domain = db.Column(db.String(256), index=True) + domain = db.Column(db.String(256), index=True, unique=True) inbox = db.Column(db.String(256)) shared_inbox = db.Column(db.String(256)) outbox = db.Column(db.String(256)) diff --git a/migrations/versions/d88b49617de0_unique_instance_domain.py b/migrations/versions/d88b49617de0_unique_instance_domain.py new file mode 100644 index 00000000..a4d65bec --- /dev/null +++ b/migrations/versions/d88b49617de0_unique_instance_domain.py @@ -0,0 +1,74 @@ +"""unique instance domain + +Revision ID: d88b49617de0 +Revises: c3cc707ab5e9 +Create Date: 2024-11-24 16:22:16.733285 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text + +# revision identifiers, used by Alembic. +revision = 'd88b49617de0' +down_revision = 'c3cc707ab5e9' +branch_labels = None +depends_on = None + + +def upgrade(): + # Find duplicate communities by ap_profile_id + dupes_query = text(''' + SELECT domain FROM "instance" + GROUP BY domain + HAVING COUNT(*) > 1 + ''') + + conn = op.get_bind() + duplicate_instances = conn.execute(dupes_query).scalars() + print('Cleaning up duplicate instances...') + + for domain in duplicate_instances: + if domain is None: + continue + # Get all communities with the same ap_profile_id, ordered by ID + users_query = text(''' + SELECT id FROM "instance" + WHERE domain = :domain + ORDER BY id + ''') + instances = conn.execute(users_query, {"domain": domain}).fetchall() + + # Set the lowest ID as the new_id, and collect other IDs to update/delete + new_id = instances[0].id + old_ids = [instance.id for instance in instances[1:]] + + print(domain) + + if old_ids: + # Update tables with batch IN clause + conn.execute(text('UPDATE "community" SET instance_id = :new_id WHERE instance_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "user" SET instance_id = :new_id WHERE instance_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "post" SET instance_id = :new_id WHERE instance_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "post_reply" SET instance_id = :new_id WHERE instance_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('UPDATE "report" SET source_instance_id = :new_id WHERE source_instance_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "instance_role" WHERE instance_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + conn.execute(text('DELETE FROM "instance_block" WHERE instance_id IN :old_ids'), {"old_ids": tuple(old_ids)}) + + # Delete the duplicate instances + conn.execute(text('DELETE FROM "instance" WHERE id IN :old_ids'), {"old_ids": tuple(old_ids)}) + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('instance', schema=None) as batch_op: + batch_op.drop_index('ix_instance_domain') + batch_op.create_index(batch_op.f('ix_instance_domain'), ['domain'], unique=True) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('instance', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('ix_instance_domain')) + batch_op.create_index('ix_instance_domain', ['domain'], unique=False) + + # ### end Alembic commands ###