mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
comment ranking using confidence formula
This commit is contained in:
parent
b83ae5fa2f
commit
f26ce95864
6 changed files with 71 additions and 55 deletions
|
@ -21,7 +21,7 @@ from io import BytesIO
|
|||
|
||||
from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
|
||||
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
||||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction
|
||||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
|
||||
|
||||
|
||||
def public_key():
|
||||
|
@ -782,6 +782,7 @@ def downvote_post_reply(comment, user):
|
|||
db.session.add(vote)
|
||||
else:
|
||||
pass # they have already downvoted this reply
|
||||
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||
|
||||
|
||||
def upvote_post_reply(comment, user):
|
||||
|
@ -818,6 +819,7 @@ def upvote_post_reply(comment, user):
|
|||
db.session.add(vote)
|
||||
else:
|
||||
pass # they have already upvoted this reply
|
||||
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||
|
||||
|
||||
def upvote_post(post, user):
|
||||
|
@ -961,7 +963,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
|
|||
db.session.add(vote)
|
||||
post_reply.up_votes += 1
|
||||
post_reply.score += 1
|
||||
post_reply.ranking += 1
|
||||
post_reply.ranking = confidence(post_reply.up_votes, post_reply.down_votes)
|
||||
db.session.commit()
|
||||
else:
|
||||
activity_log.exception_message = 'Comments disabled, reply discarded'
|
||||
|
|
|
@ -712,7 +712,7 @@ class PostReply(db.Model):
|
|||
from_bot = db.Column(db.Boolean, default=False)
|
||||
up_votes = db.Column(db.Integer, default=0)
|
||||
down_votes = db.Column(db.Integer, default=0)
|
||||
ranking = db.Column(db.Integer, default=0, index=True) # used for 'hot' sorting
|
||||
ranking = db.Column(db.Float, default=0.0, index=True) # used for 'hot' sorting
|
||||
language = db.Column(db.String(10))
|
||||
edited_at = db.Column(db.DateTime)
|
||||
reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports
|
||||
|
|
|
@ -19,7 +19,7 @@ from app.post import bp
|
|||
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
|
||||
shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \
|
||||
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
|
||||
reply_already_exists, reply_is_just_link_to_gif_reaction
|
||||
reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
|
||||
|
||||
|
||||
def show_post(post_id: int):
|
||||
|
@ -337,6 +337,7 @@ def comment_vote(comment_id, vote_direction):
|
|||
|
||||
current_user.last_seen = utcnow()
|
||||
current_user.ip_address = ip_address()
|
||||
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||
db.session.commit()
|
||||
current_user.recalculate_attitude()
|
||||
db.session.commit()
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
# from https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
|
||||
|
||||
from math import sqrt, log
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
|
||||
epoch = datetime(1970, 1, 1)
|
||||
|
||||
|
||||
def epoch_seconds(date):
|
||||
td = date - epoch
|
||||
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
||||
|
||||
|
||||
def score(ups, downs):
|
||||
return ups - downs
|
||||
|
||||
|
||||
# used for ranking stories
|
||||
def hot(ups, downs, date):
|
||||
s = score(ups, downs)
|
||||
order = log(max(abs(s), 1), 10)
|
||||
sign = 1 if s > 0 else -1 if s < 0 else 0
|
||||
seconds = epoch_seconds(date) - 1134028003 # this value seems to be an arbitrary time in 2005.
|
||||
return round(sign * order + seconds / 45000, 7)
|
||||
|
||||
|
||||
# used for ranking comments
|
||||
def _confidence(ups, downs):
|
||||
n = ups + downs
|
||||
|
||||
if n == 0:
|
||||
return 0
|
||||
|
||||
z = 1.281551565545
|
||||
p = float(ups) / n
|
||||
|
||||
left = p + 1 / (2 * n) * z * z
|
||||
right = z * sqrt(p * (1 - p) / n + z * z / (4 * n * n))
|
||||
under = 1 + 1 / n * z * z
|
||||
|
||||
return (left - right) / under
|
||||
|
||||
|
||||
def confidence(ups, downs):
|
||||
if ups + downs == 0:
|
||||
return 0
|
||||
else:
|
||||
return _confidence(ups, downs)
|
27
app/utils.py
27
app/utils.py
|
@ -451,6 +451,7 @@ def reply_already_exists(user_id, post_id, parent_id, body) -> bool:
|
|||
def reply_is_just_link_to_gif_reaction(body) -> bool:
|
||||
tmp_body = body.strip()
|
||||
if tmp_body.startswith('https://media.tenor.com/') or \
|
||||
tmp_body.startswith('https://i.giphy.com/') or \
|
||||
tmp_body.startswith('https://media1.giphy.com/') or \
|
||||
tmp_body.startswith('https://media2.giphy.com/') or \
|
||||
tmp_body.startswith('https://media3.giphy.com/') or \
|
||||
|
@ -480,9 +481,9 @@ def awaken_dormant_instance(instance):
|
|||
db.session.commit()
|
||||
|
||||
|
||||
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
|
||||
epoch = datetime(1970, 1, 1)
|
||||
|
||||
|
||||
def epoch_seconds(date):
|
||||
td = date - epoch
|
||||
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
||||
|
@ -497,3 +498,27 @@ def post_ranking(score, date: datetime):
|
|||
sign = 1 if score > 0 else -1 if score < 0 else 0
|
||||
seconds = epoch_seconds(date) - 1685766018
|
||||
return round(sign * order + seconds / 45000, 7)
|
||||
|
||||
|
||||
# used for ranking comments
|
||||
def _confidence(ups, downs):
|
||||
n = ups + downs
|
||||
|
||||
if n == 0:
|
||||
return 0.0
|
||||
|
||||
z = 1.281551565545
|
||||
p = float(ups) / n
|
||||
|
||||
left = p + 1 / (2 * n) * z * z
|
||||
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
|
||||
under = 1 + 1 / n * z * z
|
||||
|
||||
return (left - right) / under
|
||||
|
||||
|
||||
def confidence(ups, downs) -> float:
|
||||
if ups + downs == 0:
|
||||
return 0.0
|
||||
else:
|
||||
return _confidence(ups, downs)
|
||||
|
|
38
migrations/versions/5b4a967f9988_comment_ranking_float.py
Normal file
38
migrations/versions/5b4a967f9988_comment_ranking_float.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
"""comment ranking float
|
||||
|
||||
Revision ID: 5b4a967f9988
|
||||
Revises: dc49309fc13e
|
||||
Create Date: 2024-01-07 21:33:02.694552
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '5b4a967f9988'
|
||||
down_revision = 'dc49309fc13e'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('post_reply', schema=None) as batch_op:
|
||||
batch_op.alter_column('ranking',
|
||||
existing_type=sa.INTEGER(),
|
||||
type_=sa.Float(),
|
||||
existing_nullable=True)
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('post_reply', schema=None) as batch_op:
|
||||
batch_op.alter_column('ranking',
|
||||
existing_type=sa.Float(),
|
||||
type_=sa.INTEGER(),
|
||||
existing_nullable=True)
|
||||
|
||||
# ### end Alembic commands ###
|
Loading…
Reference in a new issue