comment ranking using confidence formula

This commit is contained in:
rimu 2024-01-07 21:36:04 +13:00
parent b83ae5fa2f
commit f26ce95864
6 changed files with 71 additions and 55 deletions

View file

@ -21,7 +21,7 @@ from io import BytesIO
from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
def public_key(): def public_key():
@ -782,6 +782,7 @@ def downvote_post_reply(comment, user):
db.session.add(vote) db.session.add(vote)
else: else:
pass # they have already downvoted this reply pass # they have already downvoted this reply
comment.ranking = confidence(comment.up_votes, comment.down_votes)
def upvote_post_reply(comment, user): def upvote_post_reply(comment, user):
@ -818,6 +819,7 @@ def upvote_post_reply(comment, user):
db.session.add(vote) db.session.add(vote)
else: else:
pass # they have already upvoted this reply pass # they have already upvoted this reply
comment.ranking = confidence(comment.up_votes, comment.down_votes)
def upvote_post(post, user): def upvote_post(post, user):
@ -961,7 +963,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
db.session.add(vote) db.session.add(vote)
post_reply.up_votes += 1 post_reply.up_votes += 1
post_reply.score += 1 post_reply.score += 1
post_reply.ranking += 1 post_reply.ranking = confidence(post_reply.up_votes, post_reply.down_votes)
db.session.commit() db.session.commit()
else: else:
activity_log.exception_message = 'Comments disabled, reply discarded' activity_log.exception_message = 'Comments disabled, reply discarded'

View file

@ -712,7 +712,7 @@ class PostReply(db.Model):
from_bot = db.Column(db.Boolean, default=False) from_bot = db.Column(db.Boolean, default=False)
up_votes = db.Column(db.Integer, default=0) up_votes = db.Column(db.Integer, default=0)
down_votes = db.Column(db.Integer, default=0) down_votes = db.Column(db.Integer, default=0)
ranking = db.Column(db.Integer, default=0, index=True) # used for 'hot' sorting ranking = db.Column(db.Float, default=0.0, index=True) # used for 'hot' sorting
language = db.Column(db.String(10)) language = db.Column(db.String(10))
edited_at = db.Column(db.DateTime) edited_at = db.Column(db.DateTime)
reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports

View file

@ -19,7 +19,7 @@ from app.post import bp
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \ from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \ shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \ request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
reply_already_exists, reply_is_just_link_to_gif_reaction reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
def show_post(post_id: int): def show_post(post_id: int):
@ -337,6 +337,7 @@ def comment_vote(comment_id, vote_direction):
current_user.last_seen = utcnow() current_user.last_seen = utcnow()
current_user.ip_address = ip_address() current_user.ip_address = ip_address()
comment.ranking = confidence(comment.up_votes, comment.down_votes)
db.session.commit() db.session.commit()
current_user.recalculate_attitude() current_user.recalculate_attitude()
db.session.commit() db.session.commit()

View file

@ -1,50 +0,0 @@
# from https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
from math import sqrt, log
from datetime import datetime, timedelta
epoch = datetime(1970, 1, 1)
def epoch_seconds(date):
td = date - epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def score(ups, downs):
return ups - downs
# used for ranking stories
def hot(ups, downs, date):
s = score(ups, downs)
order = log(max(abs(s), 1), 10)
sign = 1 if s > 0 else -1 if s < 0 else 0
seconds = epoch_seconds(date) - 1134028003 # this value seems to be an arbitrary time in 2005.
return round(sign * order + seconds / 45000, 7)
# used for ranking comments
def _confidence(ups, downs):
n = ups + downs
if n == 0:
return 0
z = 1.281551565545
p = float(ups) / n
left = p + 1 / (2 * n) * z * z
right = z * sqrt(p * (1 - p) / n + z * z / (4 * n * n))
under = 1 + 1 / n * z * z
return (left - right) / under
def confidence(ups, downs):
if ups + downs == 0:
return 0
else:
return _confidence(ups, downs)

View file

@ -451,6 +451,7 @@ def reply_already_exists(user_id, post_id, parent_id, body) -> bool:
def reply_is_just_link_to_gif_reaction(body) -> bool: def reply_is_just_link_to_gif_reaction(body) -> bool:
tmp_body = body.strip() tmp_body = body.strip()
if tmp_body.startswith('https://media.tenor.com/') or \ if tmp_body.startswith('https://media.tenor.com/') or \
tmp_body.startswith('https://i.giphy.com/') or \
tmp_body.startswith('https://media1.giphy.com/') or \ tmp_body.startswith('https://media1.giphy.com/') or \
tmp_body.startswith('https://media2.giphy.com/') or \ tmp_body.startswith('https://media2.giphy.com/') or \
tmp_body.startswith('https://media3.giphy.com/') or \ tmp_body.startswith('https://media3.giphy.com/') or \
@ -480,9 +481,9 @@ def awaken_dormant_instance(instance):
db.session.commit() db.session.commit()
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
epoch = datetime(1970, 1, 1) epoch = datetime(1970, 1, 1)
def epoch_seconds(date): def epoch_seconds(date):
td = date - epoch td = date - epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
@ -497,3 +498,27 @@ def post_ranking(score, date: datetime):
sign = 1 if score > 0 else -1 if score < 0 else 0 sign = 1 if score > 0 else -1 if score < 0 else 0
seconds = epoch_seconds(date) - 1685766018 seconds = epoch_seconds(date) - 1685766018
return round(sign * order + seconds / 45000, 7) return round(sign * order + seconds / 45000, 7)
# used for ranking comments
def _confidence(ups, downs):
n = ups + downs
if n == 0:
return 0.0
z = 1.281551565545
p = float(ups) / n
left = p + 1 / (2 * n) * z * z
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
under = 1 + 1 / n * z * z
return (left - right) / under
def confidence(ups, downs) -> float:
if ups + downs == 0:
return 0.0
else:
return _confidence(ups, downs)

View file

@ -0,0 +1,38 @@
"""comment ranking float
Revision ID: 5b4a967f9988
Revises: dc49309fc13e
Create Date: 2024-01-07 21:33:02.694552
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '5b4a967f9988'
down_revision = 'dc49309fc13e'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('post_reply', schema=None) as batch_op:
batch_op.alter_column('ranking',
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('post_reply', schema=None) as batch_op:
batch_op.alter_column('ranking',
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=True)
# ### end Alembic commands ###