detect post vote manipulation using jaccard_similarity #343

This commit is contained in:
rimu 2024-12-23 13:08:20 +13:00
parent 4ed1232554
commit 7c8273158c
2 changed files with 35 additions and 1 deletions

View file

@ -27,7 +27,7 @@ from app.models import Settings, BannedInstances, Interest, Role, User, RolePerm
from app.post.routes import post_delete_post from app.post.routes import post_delete_post
from app.utils import file_get_contents, retrieve_block_list, blocked_domains, retrieve_peertube_block_list, \ from app.utils import file_get_contents, retrieve_block_list, blocked_domains, retrieve_peertube_block_list, \
shorten_string, get_request, html_to_text, blocked_communities, ap_datetime, gibberish, get_request_instance, \ shorten_string, get_request, html_to_text, blocked_communities, ap_datetime, gibberish, get_request_instance, \
instance_banned instance_banned, recently_upvoted_post_replies, recently_upvoted_posts, jaccard_similarity
def register(app): def register(app):
@ -464,6 +464,26 @@ def register(app):
db.session.query(ActivityPubLog).filter(ActivityPubLog.created_at < utcnow() - timedelta(days=3)).delete() db.session.query(ActivityPubLog).filter(ActivityPubLog.created_at < utcnow() - timedelta(days=3)).delete()
db.session.commit() db.session.commit()
@app.cli.command("detect_vote_manipulation")
def detect_vote_manipulation():
with app.app_context():
print('Getting user ids...')
all_user_ids = [user.id for user in User.query.filter(User.last_seen > datetime.utcnow() - timedelta(days=7))]
print('Checking...')
for first_user_id in all_user_ids:
current_user_upvoted_posts = ['post/' + str(id) for id in recently_upvoted_posts(first_user_id)]
current_user_upvoted_replies = ['reply/' + str(id) for id in recently_upvoted_post_replies(first_user_id)]
current_user_upvotes = set(current_user_upvoted_posts + current_user_upvoted_replies)
if len(current_user_upvotes) > 12:
for other_user_id in all_user_ids:
if jaccard_similarity(current_user_upvotes, other_user_id) >= 95:
first_user = User.query.get(first_user_id)
other_user = User.query.get(other_user_id)
if first_user_id != other_user_id:
print(f'{first_user.link()} votes the same as {other_user.link()}')
print('Done')
@app.cli.command("migrate_community_notifs") @app.cli.command("migrate_community_notifs")
def migrate_community_notifs(): def migrate_community_notifs():
with app.app_context(): with app.app_context():

View file

@ -1249,3 +1249,17 @@ def community_ids_from_instances(instance_ids) -> List[int]:
def get_task_session() -> Session: def get_task_session() -> Session:
# Use the same engine as the main app, but create an independent session # Use the same engine as the main app, but create an independent session
return Session(bind=db.engine) return Session(bind=db.engine)
def jaccard_similarity(user1_upvoted: set, user2_id: int):
user2_upvoted_posts = ['post/' + str(id) for id in recently_upvoted_posts(user2_id)]
user2_upvoted_replies = ['reply/' + str(id) for id in recently_upvoted_post_replies(user2_id)]
user2_upvoted = set(user2_upvoted_posts + user2_upvoted_replies)
if len(user2_upvoted) > 12:
intersection = len(user1_upvoted.intersection(user2_upvoted))
union = len(user1_upvoted.union(user2_upvoted))
return (intersection / union) * 100
else:
return 0