mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 11:26:56 -08:00
merge duplicate posts #348
This commit is contained in:
parent
3b462377ce
commit
51fba1a156
2 changed files with 73 additions and 1 deletions
|
@ -1117,7 +1117,7 @@ class Post(db.Model):
|
|||
cross_posts = db.Column(MutableList.as_mutable(ARRAY(db.Integer)))
|
||||
tags = db.relationship('Tag', lazy='dynamic', secondary=post_tag, backref=db.backref('posts', lazy='dynamic'))
|
||||
|
||||
ap_id = db.Column(db.String(255), index=True)
|
||||
ap_id = db.Column(db.String(255), index=True, unique=True)
|
||||
ap_create_id = db.Column(db.String(100))
|
||||
ap_announce_id = db.Column(db.String(100))
|
||||
|
||||
|
|
72
migrations/versions/299e0384c8f3_unique_post_ap_id.py
Normal file
72
migrations/versions/299e0384c8f3_unique_post_ap_id.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
"""unique post ap id
|
||||
|
||||
Revision ID: 299e0384c8f3
|
||||
Revises: 7f7dfd4d4f1b
|
||||
Create Date: 2024-11-24 15:18:51.979847
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '299e0384c8f3'
|
||||
down_revision = '7f7dfd4d4f1b'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Find duplicate posts by ap_id
|
||||
dupes_query = text('''
|
||||
SELECT ap_id FROM "post"
|
||||
GROUP BY ap_id
|
||||
HAVING COUNT(*) > 1
|
||||
''')
|
||||
|
||||
conn = op.get_bind()
|
||||
duplicate_posts = conn.execute(dupes_query).scalars()
|
||||
print('Cleaning up duplicate posts, this may take a while...')
|
||||
|
||||
for ap_id in duplicate_posts:
|
||||
if ap_id is None:
|
||||
continue
|
||||
# Get all communities with the same ap_profile_id, ordered by ID
|
||||
users_query = text('''
|
||||
SELECT id FROM "post"
|
||||
WHERE ap_id = :ap_id
|
||||
ORDER BY id
|
||||
''')
|
||||
posts = conn.execute(users_query, {"ap_id": ap_id}).fetchall()
|
||||
|
||||
# Set the lowest ID as the new_id, and collect other IDs to update/delete
|
||||
new_id = posts[0].id
|
||||
old_ids = [post.id for post in posts[1:]]
|
||||
|
||||
print(ap_id)
|
||||
|
||||
if old_ids:
|
||||
# Update tables with batch IN clause
|
||||
conn.execute(text('UPDATE "post_reply" SET post_id = :new_id WHERE post_id IN :old_ids'), {"new_id": new_id, "old_ids": tuple(old_ids)})
|
||||
conn.execute(text('DELETE FROM "post_tag" WHERE post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
|
||||
conn.execute(text('DELETE FROM "read_posts" WHERE read_post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
|
||||
conn.execute(text('DELETE FROM "post_vote" WHERE post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
|
||||
conn.execute(text('DELETE FROM "report" WHERE suspect_post_id IN :old_ids'), {"old_ids": tuple(old_ids)})
|
||||
|
||||
# Delete the duplicate posts
|
||||
conn.execute(text('DELETE FROM "post" WHERE id IN :old_ids'), {"old_ids": tuple(old_ids)})
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('post', schema=None) as batch_op:
|
||||
batch_op.drop_index('ix_post_ap_id')
|
||||
batch_op.create_index(batch_op.f('ix_post_ap_id'), ['ap_id'], unique=True)
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('post', schema=None) as batch_op:
|
||||
batch_op.drop_index(batch_op.f('ix_post_ap_id'))
|
||||
batch_op.create_index('ix_post_ap_id', ['ap_id'], unique=False)
|
||||
|
||||
# ### end Alembic commands ###
|
Loading…
Reference in a new issue