comment ranking using confidence formula

2025-01-23 19:36:56 -08:00 · 2024-01-07 21:36:04 +13:00 · 2024-01-07 21:36:04 +13:00 · f26ce95864
commit f26ce95864
parent b83ae5fa2f
6 changed files with 71 additions and 55 deletions
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@ -21,7 +21,7 @@ from io import BytesIO

 from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
    is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
-    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction
+    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence


 def public_key():
@ -782,6 +782,7 @@ def downvote_post_reply(comment, user):
            db.session.add(vote)
        else:
            pass  # they have already downvoted this reply
+    comment.ranking = confidence(comment.up_votes, comment.down_votes)


 def upvote_post_reply(comment, user):
@ -818,6 +819,7 @@ def upvote_post_reply(comment, user):
            db.session.add(vote)
        else:
            pass  # they have already upvoted this reply
+    comment.ranking = confidence(comment.up_votes, comment.down_votes)


 def upvote_post(post, user):
@ -961,7 +963,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
                    db.session.add(vote)
                    post_reply.up_votes += 1
                    post_reply.score += 1
-                    post_reply.ranking += 1
+                    post_reply.ranking = confidence(post_reply.up_votes, post_reply.down_votes)
                    db.session.commit()
            else:
                activity_log.exception_message = 'Comments disabled, reply discarded'
--- a/app/models.py
+++ b/app/models.py
@ -712,7 +712,7 @@ class PostReply(db.Model):
    from_bot = db.Column(db.Boolean, default=False)
    up_votes = db.Column(db.Integer, default=0)
    down_votes = db.Column(db.Integer, default=0)
-    ranking = db.Column(db.Integer, default=0, index=True)  # used for 'hot' sorting
+    ranking = db.Column(db.Float, default=0.0, index=True)  # used for 'hot' sorting
    language = db.Column(db.String(10))
    edited_at = db.Column(db.DateTime)
    reports = db.Column(db.Integer, default=0)  # how many times this post has been reported. Set to -1 to ignore reports
--- a/app/post/routes.py
+++ b/app/post/routes.py
@ -19,7 +19,7 @@ from app.post import bp
 from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
    shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \
    request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
-    reply_already_exists, reply_is_just_link_to_gif_reaction
+    reply_already_exists, reply_is_just_link_to_gif_reaction, confidence


 def show_post(post_id: int):
@ -337,6 +337,7 @@ def comment_vote(comment_id, vote_direction):

    current_user.last_seen = utcnow()
    current_user.ip_address = ip_address()
+    comment.ranking = confidence(comment.up_votes, comment.down_votes)
    db.session.commit()
    current_user.recalculate_attitude()
    db.session.commit()
--- a/app/sorting.py
+++ b/app/sorting.py
@ -1,50 +0,0 @@
-# from https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
-
-from math import sqrt, log
-from datetime import datetime, timedelta
-
-
-
-epoch = datetime(1970, 1, 1)
-
-
-def epoch_seconds(date):
-    td = date - epoch
-    return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
-
-
-def score(ups, downs):
-    return ups - downs
-
-
-# used for ranking stories
-def hot(ups, downs, date):
-    s = score(ups, downs)
-    order = log(max(abs(s), 1), 10)
-    sign = 1 if s > 0 else -1 if s < 0 else 0
-    seconds = epoch_seconds(date) - 1134028003      # this value seems to be an arbitrary time in 2005.
-    return round(sign * order + seconds / 45000, 7)
-
-
-# used for ranking comments
-def _confidence(ups, downs):
-    n = ups + downs
-
-    if n == 0:
-        return 0
-
-    z = 1.281551565545
-    p = float(ups) / n
-
-    left = p + 1 / (2 * n) * z * z
-    right = z * sqrt(p * (1 - p) / n + z * z / (4 * n * n))
-    under = 1 + 1 / n * z * z
-
-    return (left - right) / under
-
-
-def confidence(ups, downs):
-    if ups + downs == 0:
-        return 0
-    else:
-        return _confidence(ups, downs)
--- a/app/utils.py
+++ b/app/utils.py
@ -451,6 +451,7 @@ def reply_already_exists(user_id, post_id, parent_id, body) -> bool:
 def reply_is_just_link_to_gif_reaction(body) -> bool:
    tmp_body = body.strip()
    if tmp_body.startswith('https://media.tenor.com/') or \
+            tmp_body.startswith('https://i.giphy.com/') or \
            tmp_body.startswith('https://media1.giphy.com/') or \
            tmp_body.startswith('https://media2.giphy.com/') or \
            tmp_body.startswith('https://media3.giphy.com/') or \
@ -480,9 +481,9 @@ def awaken_dormant_instance(instance):
            db.session.commit()


+# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
 epoch = datetime(1970, 1, 1)

-
 def epoch_seconds(date):
    td = date - epoch
    return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
@ -497,3 +498,27 @@ def post_ranking(score, date: datetime):
    sign = 1 if score > 0 else -1 if score < 0 else 0
    seconds = epoch_seconds(date) - 1685766018
    return round(sign * order + seconds / 45000, 7)
+
+
+# used for ranking comments
+def _confidence(ups, downs):
+    n = ups + downs
+
+    if n == 0:
+        return 0.0
+
+    z = 1.281551565545
+    p = float(ups) / n
+
+    left = p + 1 / (2 * n) * z * z
+    right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
+    under = 1 + 1 / n * z * z
+
+    return (left - right) / under
+
+
+def confidence(ups, downs) -> float:
+    if ups + downs == 0:
+        return 0.0
+    else:
+        return _confidence(ups, downs)
--- a/migrations/versions/5b4a967f9988_comment_ranking_float.py
+++ b/migrations/versions/5b4a967f9988_comment_ranking_float.py
@ -0,0 +1,38 @@
+"""comment ranking float
+
+Revision ID: 5b4a967f9988
+Revises: dc49309fc13e
+Create Date: 2024-01-07 21:33:02.694552
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '5b4a967f9988'
+down_revision = 'dc49309fc13e'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('post_reply', schema=None) as batch_op:
+        batch_op.alter_column('ranking',
+               existing_type=sa.INTEGER(),
+               type_=sa.Float(),
+               existing_nullable=True)
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('post_reply', schema=None) as batch_op:
+        batch_op.alter_column('ranking',
+               existing_type=sa.Float(),
+               type_=sa.INTEGER(),
+               existing_nullable=True)
+
+    # ### end Alembic commands ###