pyfedi/migrations/versions/299e0384c8f3_unique_post_ap_id.py

73 lines
2.6 KiB
Python
Raw Permalink Normal View History

2024-11-23 18:27:21 -08:00
"""unique post ap id
Revision ID: 299e0384c8f3
Revises: 7f7dfd4d4f1b
Create Date: 2024-11-24 15:18:51.979847
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
# revision identifiers, used by Alembic.
revision = '299e0384c8f3'
down_revision = '7f7dfd4d4f1b'
branch_labels = None
depends_on = None
def upgrade():
# Find duplicate posts by ap_id
dupes_query = text('''
SELECT ap_id FROM "post"
GROUP BY ap_id
HAVING COUNT(*) > 1
''')
conn = op.get_bind()
duplicate_posts = conn.execute(dupes_query).scalars()
print('Cleaning up duplicate posts, this may take a while...')
for ap_id in duplicate_posts:
if ap_id is None:
continue
# Get all communities with the same ap_profile_id, ordered by ID
users_query = text('''
SELECT id FROM "post"
WHERE ap_id = :ap_id
ORDER BY id
''')
posts = conn.execute(users_query, {"ap_id": ap_id}).fetchall()
# Set the lowest ID as the new_id, and collect other IDs to update/delete
new_id = posts[0].id
old_ids = [post.id for post in posts[1:]]
print(ap_id)
if old_ids:
# Update tables with batch IN clause
conn.execute(text('UPDATE "post_reply" SET post_id = :new_id WHERE post_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)})
conn.execute(text('DELETE FROM "post_tag" WHERE post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
conn.execute(text('DELETE FROM "read_posts" WHERE read_post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
conn.execute(text('DELETE FROM "post_vote" WHERE post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
conn.execute(text('DELETE FROM "report" WHERE suspect_post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
# Delete the duplicate posts
conn.execute(text('DELETE FROM "post" WHERE id IN :old_ids'), {"old_ids": tuple(old_ids)})
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('post', schema=None) as batch_op:
batch_op.drop_index('ix_post_ap_id')
batch_op.create_index(batch_op.f('ix_post_ap_id'), ['ap_id'], unique=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('post', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_post_ap_id'))
batch_op.create_index('ix_post_ap_id', ['ap_id'], unique=False)
# ### end Alembic commands ###