From f26ce95864b3e137814fce7c8b460ce88fb26a37 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Sun, 7 Jan 2024 21:36:04 +1300 Subject: [PATCH] comment ranking using confidence formula --- app/activitypub/util.py | 6 ++- app/models.py | 2 +- app/post/routes.py | 3 +- app/sorting.py | 50 ------------------- app/utils.py | 27 +++++++++- .../5b4a967f9988_comment_ranking_float.py | 38 ++++++++++++++ 6 files changed, 71 insertions(+), 55 deletions(-) delete mode 100644 app/sorting.py create mode 100644 migrations/versions/5b4a967f9988_comment_ranking_float.py diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 324ba02d..5f7e5fd4 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -21,7 +21,7 @@ from io import BytesIO from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence def public_key(): @@ -782,6 +782,7 @@ def downvote_post_reply(comment, user): db.session.add(vote) else: pass # they have already downvoted this reply + comment.ranking = confidence(comment.up_votes, comment.down_votes) def upvote_post_reply(comment, user): @@ -818,6 +819,7 @@ def upvote_post_reply(comment, user): db.session.add(vote) else: pass # they have already upvoted this reply + comment.ranking = confidence(comment.up_votes, comment.down_votes) def upvote_post(post, user): @@ -961,7 +963,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep db.session.add(vote) post_reply.up_votes += 1 post_reply.score += 1 - post_reply.ranking += 1 + post_reply.ranking = confidence(post_reply.up_votes, post_reply.down_votes) db.session.commit() else: activity_log.exception_message = 'Comments disabled, reply discarded' diff --git a/app/models.py b/app/models.py index 6d6bd9e5..3e398311 100644 --- a/app/models.py +++ b/app/models.py @@ -712,7 +712,7 @@ class PostReply(db.Model): from_bot = db.Column(db.Boolean, default=False) up_votes = db.Column(db.Integer, default=0) down_votes = db.Column(db.Integer, default=0) - ranking = db.Column(db.Integer, default=0, index=True) # used for 'hot' sorting + ranking = db.Column(db.Float, default=0.0, index=True) # used for 'hot' sorting language = db.Column(db.String(10)) edited_at = db.Column(db.DateTime) reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports diff --git a/app/post/routes.py b/app/post/routes.py index 19f02a6c..8ba7f3d3 100644 --- a/app/post/routes.py +++ b/app/post/routes.py @@ -19,7 +19,7 @@ from app.post import bp from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \ shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \ request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \ - reply_already_exists, reply_is_just_link_to_gif_reaction + reply_already_exists, reply_is_just_link_to_gif_reaction, confidence def show_post(post_id: int): @@ -337,6 +337,7 @@ def comment_vote(comment_id, vote_direction): current_user.last_seen = utcnow() current_user.ip_address = ip_address() + comment.ranking = confidence(comment.up_votes, comment.down_votes) db.session.commit() current_user.recalculate_attitude() db.session.commit() diff --git a/app/sorting.py b/app/sorting.py deleted file mode 100644 index 441bd64b..00000000 --- a/app/sorting.py +++ /dev/null @@ -1,50 +0,0 @@ -# from https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9 - -from math import sqrt, log -from datetime import datetime, timedelta - - - -epoch = datetime(1970, 1, 1) - - -def epoch_seconds(date): - td = date - epoch - return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) - - -def score(ups, downs): - return ups - downs - - -# used for ranking stories -def hot(ups, downs, date): - s = score(ups, downs) - order = log(max(abs(s), 1), 10) - sign = 1 if s > 0 else -1 if s < 0 else 0 - seconds = epoch_seconds(date) - 1134028003 # this value seems to be an arbitrary time in 2005. - return round(sign * order + seconds / 45000, 7) - - -# used for ranking comments -def _confidence(ups, downs): - n = ups + downs - - if n == 0: - return 0 - - z = 1.281551565545 - p = float(ups) / n - - left = p + 1 / (2 * n) * z * z - right = z * sqrt(p * (1 - p) / n + z * z / (4 * n * n)) - under = 1 + 1 / n * z * z - - return (left - right) / under - - -def confidence(ups, downs): - if ups + downs == 0: - return 0 - else: - return _confidence(ups, downs) \ No newline at end of file diff --git a/app/utils.py b/app/utils.py index fa14d1bc..745490fb 100644 --- a/app/utils.py +++ b/app/utils.py @@ -451,6 +451,7 @@ def reply_already_exists(user_id, post_id, parent_id, body) -> bool: def reply_is_just_link_to_gif_reaction(body) -> bool: tmp_body = body.strip() if tmp_body.startswith('https://media.tenor.com/') or \ + tmp_body.startswith('https://i.giphy.com/') or \ tmp_body.startswith('https://media1.giphy.com/') or \ tmp_body.startswith('https://media2.giphy.com/') or \ tmp_body.startswith('https://media3.giphy.com/') or \ @@ -480,9 +481,9 @@ def awaken_dormant_instance(instance): db.session.commit() +# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9 epoch = datetime(1970, 1, 1) - def epoch_seconds(date): td = date - epoch return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) @@ -497,3 +498,27 @@ def post_ranking(score, date: datetime): sign = 1 if score > 0 else -1 if score < 0 else 0 seconds = epoch_seconds(date) - 1685766018 return round(sign * order + seconds / 45000, 7) + + +# used for ranking comments +def _confidence(ups, downs): + n = ups + downs + + if n == 0: + return 0.0 + + z = 1.281551565545 + p = float(ups) / n + + left = p + 1 / (2 * n) * z * z + right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n)) + under = 1 + 1 / n * z * z + + return (left - right) / under + + +def confidence(ups, downs) -> float: + if ups + downs == 0: + return 0.0 + else: + return _confidence(ups, downs) diff --git a/migrations/versions/5b4a967f9988_comment_ranking_float.py b/migrations/versions/5b4a967f9988_comment_ranking_float.py new file mode 100644 index 00000000..b613fd05 --- /dev/null +++ b/migrations/versions/5b4a967f9988_comment_ranking_float.py @@ -0,0 +1,38 @@ +"""comment ranking float + +Revision ID: 5b4a967f9988 +Revises: dc49309fc13e +Create Date: 2024-01-07 21:33:02.694552 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '5b4a967f9988' +down_revision = 'dc49309fc13e' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('post_reply', schema=None) as batch_op: + batch_op.alter_column('ranking', + existing_type=sa.INTEGER(), + type_=sa.Float(), + existing_nullable=True) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('post_reply', schema=None) as batch_op: + batch_op.alter_column('ranking', + existing_type=sa.Float(), + type_=sa.INTEGER(), + existing_nullable=True) + + # ### end Alembic commands ###