mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
detect post vote manipulation using jaccard_similarity #343
This commit is contained in:
parent
4ed1232554
commit
7c8273158c
2 changed files with 35 additions and 1 deletions
22
app/cli.py
22
app/cli.py
|
@ -27,7 +27,7 @@ from app.models import Settings, BannedInstances, Interest, Role, User, RolePerm
|
||||||
from app.post.routes import post_delete_post
|
from app.post.routes import post_delete_post
|
||||||
from app.utils import file_get_contents, retrieve_block_list, blocked_domains, retrieve_peertube_block_list, \
|
from app.utils import file_get_contents, retrieve_block_list, blocked_domains, retrieve_peertube_block_list, \
|
||||||
shorten_string, get_request, html_to_text, blocked_communities, ap_datetime, gibberish, get_request_instance, \
|
shorten_string, get_request, html_to_text, blocked_communities, ap_datetime, gibberish, get_request_instance, \
|
||||||
instance_banned
|
instance_banned, recently_upvoted_post_replies, recently_upvoted_posts, jaccard_similarity
|
||||||
|
|
||||||
|
|
||||||
def register(app):
|
def register(app):
|
||||||
|
@ -464,6 +464,26 @@ def register(app):
|
||||||
db.session.query(ActivityPubLog).filter(ActivityPubLog.created_at < utcnow() - timedelta(days=3)).delete()
|
db.session.query(ActivityPubLog).filter(ActivityPubLog.created_at < utcnow() - timedelta(days=3)).delete()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
@app.cli.command("detect_vote_manipulation")
|
||||||
|
def detect_vote_manipulation():
|
||||||
|
with app.app_context():
|
||||||
|
print('Getting user ids...')
|
||||||
|
all_user_ids = [user.id for user in User.query.filter(User.last_seen > datetime.utcnow() - timedelta(days=7))]
|
||||||
|
print('Checking...')
|
||||||
|
for first_user_id in all_user_ids:
|
||||||
|
current_user_upvoted_posts = ['post/' + str(id) for id in recently_upvoted_posts(first_user_id)]
|
||||||
|
current_user_upvoted_replies = ['reply/' + str(id) for id in recently_upvoted_post_replies(first_user_id)]
|
||||||
|
|
||||||
|
current_user_upvotes = set(current_user_upvoted_posts + current_user_upvoted_replies)
|
||||||
|
if len(current_user_upvotes) > 12:
|
||||||
|
for other_user_id in all_user_ids:
|
||||||
|
if jaccard_similarity(current_user_upvotes, other_user_id) >= 95:
|
||||||
|
first_user = User.query.get(first_user_id)
|
||||||
|
other_user = User.query.get(other_user_id)
|
||||||
|
if first_user_id != other_user_id:
|
||||||
|
print(f'{first_user.link()} votes the same as {other_user.link()}')
|
||||||
|
print('Done')
|
||||||
|
|
||||||
@app.cli.command("migrate_community_notifs")
|
@app.cli.command("migrate_community_notifs")
|
||||||
def migrate_community_notifs():
|
def migrate_community_notifs():
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
|
|
14
app/utils.py
14
app/utils.py
|
@ -1249,3 +1249,17 @@ def community_ids_from_instances(instance_ids) -> List[int]:
|
||||||
def get_task_session() -> Session:
|
def get_task_session() -> Session:
|
||||||
# Use the same engine as the main app, but create an independent session
|
# Use the same engine as the main app, but create an independent session
|
||||||
return Session(bind=db.engine)
|
return Session(bind=db.engine)
|
||||||
|
|
||||||
|
|
||||||
|
def jaccard_similarity(user1_upvoted: set, user2_id: int):
|
||||||
|
user2_upvoted_posts = ['post/' + str(id) for id in recently_upvoted_posts(user2_id)]
|
||||||
|
user2_upvoted_replies = ['reply/' + str(id) for id in recently_upvoted_post_replies(user2_id)]
|
||||||
|
user2_upvoted = set(user2_upvoted_posts + user2_upvoted_replies)
|
||||||
|
|
||||||
|
if len(user2_upvoted) > 12:
|
||||||
|
intersection = len(user1_upvoted.intersection(user2_upvoted))
|
||||||
|
union = len(user1_upvoted.union(user2_upvoted))
|
||||||
|
|
||||||
|
return (intersection / union) * 100
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
Loading…
Reference in a new issue