mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
comment ranking using confidence formula
This commit is contained in:
parent
b83ae5fa2f
commit
f26ce95864
6 changed files with 71 additions and 55 deletions
|
@ -21,7 +21,7 @@ from io import BytesIO
|
||||||
|
|
||||||
from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
|
from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
|
||||||
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
||||||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction
|
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
|
||||||
|
|
||||||
|
|
||||||
def public_key():
|
def public_key():
|
||||||
|
@ -782,6 +782,7 @@ def downvote_post_reply(comment, user):
|
||||||
db.session.add(vote)
|
db.session.add(vote)
|
||||||
else:
|
else:
|
||||||
pass # they have already downvoted this reply
|
pass # they have already downvoted this reply
|
||||||
|
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||||
|
|
||||||
|
|
||||||
def upvote_post_reply(comment, user):
|
def upvote_post_reply(comment, user):
|
||||||
|
@ -818,6 +819,7 @@ def upvote_post_reply(comment, user):
|
||||||
db.session.add(vote)
|
db.session.add(vote)
|
||||||
else:
|
else:
|
||||||
pass # they have already upvoted this reply
|
pass # they have already upvoted this reply
|
||||||
|
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||||
|
|
||||||
|
|
||||||
def upvote_post(post, user):
|
def upvote_post(post, user):
|
||||||
|
@ -961,7 +963,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
|
||||||
db.session.add(vote)
|
db.session.add(vote)
|
||||||
post_reply.up_votes += 1
|
post_reply.up_votes += 1
|
||||||
post_reply.score += 1
|
post_reply.score += 1
|
||||||
post_reply.ranking += 1
|
post_reply.ranking = confidence(post_reply.up_votes, post_reply.down_votes)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
else:
|
else:
|
||||||
activity_log.exception_message = 'Comments disabled, reply discarded'
|
activity_log.exception_message = 'Comments disabled, reply discarded'
|
||||||
|
|
|
@ -712,7 +712,7 @@ class PostReply(db.Model):
|
||||||
from_bot = db.Column(db.Boolean, default=False)
|
from_bot = db.Column(db.Boolean, default=False)
|
||||||
up_votes = db.Column(db.Integer, default=0)
|
up_votes = db.Column(db.Integer, default=0)
|
||||||
down_votes = db.Column(db.Integer, default=0)
|
down_votes = db.Column(db.Integer, default=0)
|
||||||
ranking = db.Column(db.Integer, default=0, index=True) # used for 'hot' sorting
|
ranking = db.Column(db.Float, default=0.0, index=True) # used for 'hot' sorting
|
||||||
language = db.Column(db.String(10))
|
language = db.Column(db.String(10))
|
||||||
edited_at = db.Column(db.DateTime)
|
edited_at = db.Column(db.DateTime)
|
||||||
reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports
|
reports = db.Column(db.Integer, default=0) # how many times this post has been reported. Set to -1 to ignore reports
|
||||||
|
|
|
@ -19,7 +19,7 @@ from app.post import bp
|
||||||
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
|
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
|
||||||
shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \
|
shorten_string, markdown_to_text, domain_from_url, validate_image, gibberish, ap_datetime, return_304, \
|
||||||
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
|
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
|
||||||
reply_already_exists, reply_is_just_link_to_gif_reaction
|
reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
|
||||||
|
|
||||||
|
|
||||||
def show_post(post_id: int):
|
def show_post(post_id: int):
|
||||||
|
@ -337,6 +337,7 @@ def comment_vote(comment_id, vote_direction):
|
||||||
|
|
||||||
current_user.last_seen = utcnow()
|
current_user.last_seen = utcnow()
|
||||||
current_user.ip_address = ip_address()
|
current_user.ip_address = ip_address()
|
||||||
|
comment.ranking = confidence(comment.up_votes, comment.down_votes)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
current_user.recalculate_attitude()
|
current_user.recalculate_attitude()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
# from https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
|
|
||||||
|
|
||||||
from math import sqrt, log
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
epoch = datetime(1970, 1, 1)
|
|
||||||
|
|
||||||
|
|
||||||
def epoch_seconds(date):
|
|
||||||
td = date - epoch
|
|
||||||
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
|
||||||
|
|
||||||
|
|
||||||
def score(ups, downs):
|
|
||||||
return ups - downs
|
|
||||||
|
|
||||||
|
|
||||||
# used for ranking stories
|
|
||||||
def hot(ups, downs, date):
|
|
||||||
s = score(ups, downs)
|
|
||||||
order = log(max(abs(s), 1), 10)
|
|
||||||
sign = 1 if s > 0 else -1 if s < 0 else 0
|
|
||||||
seconds = epoch_seconds(date) - 1134028003 # this value seems to be an arbitrary time in 2005.
|
|
||||||
return round(sign * order + seconds / 45000, 7)
|
|
||||||
|
|
||||||
|
|
||||||
# used for ranking comments
|
|
||||||
def _confidence(ups, downs):
|
|
||||||
n = ups + downs
|
|
||||||
|
|
||||||
if n == 0:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
z = 1.281551565545
|
|
||||||
p = float(ups) / n
|
|
||||||
|
|
||||||
left = p + 1 / (2 * n) * z * z
|
|
||||||
right = z * sqrt(p * (1 - p) / n + z * z / (4 * n * n))
|
|
||||||
under = 1 + 1 / n * z * z
|
|
||||||
|
|
||||||
return (left - right) / under
|
|
||||||
|
|
||||||
|
|
||||||
def confidence(ups, downs):
|
|
||||||
if ups + downs == 0:
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
return _confidence(ups, downs)
|
|
27
app/utils.py
27
app/utils.py
|
@ -451,6 +451,7 @@ def reply_already_exists(user_id, post_id, parent_id, body) -> bool:
|
||||||
def reply_is_just_link_to_gif_reaction(body) -> bool:
|
def reply_is_just_link_to_gif_reaction(body) -> bool:
|
||||||
tmp_body = body.strip()
|
tmp_body = body.strip()
|
||||||
if tmp_body.startswith('https://media.tenor.com/') or \
|
if tmp_body.startswith('https://media.tenor.com/') or \
|
||||||
|
tmp_body.startswith('https://i.giphy.com/') or \
|
||||||
tmp_body.startswith('https://media1.giphy.com/') or \
|
tmp_body.startswith('https://media1.giphy.com/') or \
|
||||||
tmp_body.startswith('https://media2.giphy.com/') or \
|
tmp_body.startswith('https://media2.giphy.com/') or \
|
||||||
tmp_body.startswith('https://media3.giphy.com/') or \
|
tmp_body.startswith('https://media3.giphy.com/') or \
|
||||||
|
@ -480,9 +481,9 @@ def awaken_dormant_instance(instance):
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
|
||||||
epoch = datetime(1970, 1, 1)
|
epoch = datetime(1970, 1, 1)
|
||||||
|
|
||||||
|
|
||||||
def epoch_seconds(date):
|
def epoch_seconds(date):
|
||||||
td = date - epoch
|
td = date - epoch
|
||||||
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
||||||
|
@ -497,3 +498,27 @@ def post_ranking(score, date: datetime):
|
||||||
sign = 1 if score > 0 else -1 if score < 0 else 0
|
sign = 1 if score > 0 else -1 if score < 0 else 0
|
||||||
seconds = epoch_seconds(date) - 1685766018
|
seconds = epoch_seconds(date) - 1685766018
|
||||||
return round(sign * order + seconds / 45000, 7)
|
return round(sign * order + seconds / 45000, 7)
|
||||||
|
|
||||||
|
|
||||||
|
# used for ranking comments
|
||||||
|
def _confidence(ups, downs):
|
||||||
|
n = ups + downs
|
||||||
|
|
||||||
|
if n == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
z = 1.281551565545
|
||||||
|
p = float(ups) / n
|
||||||
|
|
||||||
|
left = p + 1 / (2 * n) * z * z
|
||||||
|
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
|
||||||
|
under = 1 + 1 / n * z * z
|
||||||
|
|
||||||
|
return (left - right) / under
|
||||||
|
|
||||||
|
|
||||||
|
def confidence(ups, downs) -> float:
|
||||||
|
if ups + downs == 0:
|
||||||
|
return 0.0
|
||||||
|
else:
|
||||||
|
return _confidence(ups, downs)
|
||||||
|
|
38
migrations/versions/5b4a967f9988_comment_ranking_float.py
Normal file
38
migrations/versions/5b4a967f9988_comment_ranking_float.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
"""comment ranking float
|
||||||
|
|
||||||
|
Revision ID: 5b4a967f9988
|
||||||
|
Revises: dc49309fc13e
|
||||||
|
Create Date: 2024-01-07 21:33:02.694552
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '5b4a967f9988'
|
||||||
|
down_revision = 'dc49309fc13e'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('post_reply', schema=None) as batch_op:
|
||||||
|
batch_op.alter_column('ranking',
|
||||||
|
existing_type=sa.INTEGER(),
|
||||||
|
type_=sa.Float(),
|
||||||
|
existing_nullable=True)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('post_reply', schema=None) as batch_op:
|
||||||
|
batch_op.alter_column('ranking',
|
||||||
|
existing_type=sa.Float(),
|
||||||
|
type_=sa.INTEGER(),
|
||||||
|
existing_nullable=True)
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
Loading…
Add table
Reference in a new issue