Merge branch 'refactor-post-new'

This commit is contained in:
rimu 2024-10-21 09:56:20 +13:00
commit 42439fb90a
6 changed files with 351 additions and 307 deletions

View file

@ -25,12 +25,12 @@ from io import BytesIO
import pytesseract
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
shorten_string, remove_tracking_from_link, \
microblog_content_to_title, generate_image_from_video_url, is_video_url, \
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \
moderating_communities, is_video_hosting_site
html_to_text, add_to_modlog_activitypub, joined_communities, \
moderating_communities
from sqlalchemy import or_
@ -1625,222 +1625,12 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
activity_log.exception_message = 'Community is local only, post discarded'
activity_log.result = 'ignored'
return None
microblog = False
if 'name' not in request_json['object']: # Microblog posts
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
title = "[Microblog]"
microblog = True
else:
try:
post = Post.new(user, community, request_json, announce_id)
except Exception as ex:
activity_log.exception_message = str(ex)
return None
else:
title = request_json['object']['name'].strip()
nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
post = Post(user_id=user.id, community_id=community.id,
title=html.unescape(title),
comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
ap_id=request_json['object']['id'],
ap_create_id=request_json['id'],
ap_announce_id=announce_id,
type=constants.POST_TYPE_ARTICLE,
up_votes=1,
from_bot=user.bot,
score=instance_weight(user.ap_domain),
instance_id=user.instance_id,
indexable=user.indexable,
microblog=microblog
)
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
post.body_html = allowlist_html(request_json['object']['content'])
if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown':
post.body = request_json['object']['source']['content']
post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML
else:
post.body = html_to_text(post.body_html)
elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
post.body = request_json['object']['content']
post.body_html = markdown_to_html(post.body)
else:
if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
post.body_html = allowlist_html(request_json['object']['content'])
post.body = html_to_text(post.body_html)
if microblog:
autogenerated_title = microblog_content_to_title(post.body_html)
if len(autogenerated_title) < 20:
title = '[Microblog] ' + autogenerated_title.strip()
else:
title = autogenerated_title.strip()
if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
post.nsfl = True
if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
post.nsfw = True
post.title = title
# Discard post if it contains certain phrases. Good for stopping spam floods.
blocked_phrases_list = blocked_phrases()
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.title:
return None
if post.body:
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.body:
return None
if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
'type' in request_json['object']['attachment'][0]:
alt_text = None
if request_json['object']['attachment'][0]['type'] == 'Link':
post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4
if request_json['object']['attachment'][0]['type'] == 'Document':
post.url = request_json['object']['attachment'][0]['url'] # Mastodon
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if request_json['object']['attachment'][0]['type'] == 'Image':
post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if post.url:
if is_image_url(post.url):
post.type = POST_TYPE_IMAGE
if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
image = File(source_url=request_json['object']['image']['url'])
else:
image = File(source_url=post.url)
if alt_text:
image.alt_text = alt_text
db.session.add(image)
post.image = image
elif is_video_url(post.url): # youtube is detected later
post.type = POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
post.image = image
else:
post.type = POST_TYPE_LINK
domain = domain_from_url(post.url)
# notify about links to banned websites.
already_notified = set() # often admins and mods are the same people - avoid notifying them twice
if domain.notify_mods:
for community_member in post.community.moderators():
notify = Notification(title='Suspicious content', url=post.ap_id,
user_id=community_member.user_id,
author_id=user.id)
db.session.add(notify)
already_notified.add(community_member.user_id)
if domain.notify_admins:
for admin in Site.admins():
if admin.id not in already_notified:
notify = Notification(title='Suspicious content',
url=post.ap_id, user_id=admin.id,
author_id=user.id)
db.session.add(notify)
if domain.banned or domain.name.endswith('.pages.dev'):
post = None
activity_log.exception_message = domain.name + ' is blocked by admin'
else:
domain.post_count += 1
post.domain = domain
if post is not None:
if request_json['object']['type'] == 'Video':
post.type = POST_TYPE_VIDEO
post.url = request_json['object']['id']
if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
icon = File(source_url=request_json['object']['icon'][-1]['url'])
db.session.add(icon)
post.image = icon
# Language. Lemmy uses 'language' while Mastodon has 'contentMap'
if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
language = find_language_or_create(request_json['object']['language']['identifier'],
request_json['object']['language']['name'])
post.language_id = language.id
elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
language = find_language(next(iter(request_json['object']['contentMap'])))
post.language_id = language.id if language else None
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
for json_tag in request_json['object']['tag']:
if json_tag and json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
if 'image' in request_json['object'] and post.image is None:
image = File(source_url=request_json['object']['image']['url'])
db.session.add(image)
post.image = image
if post.image is None and post.type == POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
# Let's see if we can do better than the source instance did!
tn_url = post.url
if tn_url[:32] == 'https://www.youtube.com/watch?v=':
tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse(tn_url)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):
file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
post.image = file
db.session.add(file)
if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
post.indexable = False
if post.url:
post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com
if is_video_hosting_site(post.url):
post.type = POST_TYPE_VIDEO
db.session.add(post)
post.ranking = post_ranking(post.score, post.posted_at)
community.post_count += 1
community.last_active = utcnow()
activity_log.result = 'success'
user.post_count += 1
db.session.commit()
# Polls need to be processed quite late because they need a post_id to refer to
if request_json['object']['type'] == 'Question':
post.type = POST_TYPE_POLL
mode = 'single'
if 'anyOf' in request_json['object']:
mode = 'multiple'
poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
db.session.add(poll)
i = 1
for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
db.session.add(new_choice)
i += 1
db.session.commit()
if post.image_id:
make_image_sizes(post.image_id, 170, 512, 'posts', community.low_quality) # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
if post.community_id not in communities_banned_from(user.id):
notify_about_post(post)
if user.reputation > 100:
post.up_votes += 1
post.score += 1
post.ranking = post_ranking(post.score, post.posted_at)
db.session.commit()
return post

View file

@ -1,12 +1,16 @@
import base64
import os
from collections import namedtuple
from io import BytesIO
from random import randint
import flask
from PIL import Image, ImageOps
from flask import redirect, url_for, flash, request, make_response, session, Markup, current_app, abort, g, json, \
jsonify
from flask_login import current_user, login_required
from flask_babel import _
from pillow_heif import register_heif_opener
from slugify import slugify
from sqlalchemy import or_, desc, text
@ -21,7 +25,8 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre
EditCommunityWikiPageForm
from app.community.util import search_for_community, actor_to_community, \
save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users
delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string, \
allowed_extensions, end_poll_date
from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \
SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \
REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS
@ -38,7 +43,7 @@ from app.utils import get_setting, render_template, allowlist_html, markdown_to_
joined_communities, moderating_communities, blocked_domains, mimetype_from_url, blocked_instances, \
community_moderators, communities_banned_from, show_ban_message, recently_upvoted_posts, recently_downvoted_posts, \
blocked_users, post_ranking, languages_for_form, english_language_id, menu_topics, add_to_modlog, \
blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown
blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown, ensure_directory_exists
from feedgen.feed import FeedGenerator
from datetime import timezone, timedelta
from copy import copy
@ -586,8 +591,8 @@ def add_post(actor, type):
return show_ban_message()
community = actor_to_community(actor)
if type == 'discussion':
post_type = POST_TYPE_ARTICLE
if type == 'discussion':
form = CreateDiscussionForm()
elif type == 'link':
post_type = POST_TYPE_LINK
@ -628,37 +633,84 @@ def add_post(actor, type):
community = Community.query.get_or_404(form.communities.data)
if not can_create_post(current_user, community):
abort(401)
post = Post(user_id=current_user.id, community_id=form.communities.data, instance_id=1)
save_post(form, post, post_type)
community.post_count += 1
current_user.post_count += 1
community.last_active = g.site.last_active = utcnow()
db.session.commit()
language = Language.query.get(form.language_id.data)
request_json = {
'id': None,
'object': {
'name': form.title.data,
'type': 'Page',
'sticky': form.sticky.data,
'nsfw': form.nsfw.data,
'nsfl': form.nsfl.data,
'id': gibberish(), # this will be updated once we have the post.id
'mediaType': 'text/markdown',
'content': form.body.data,
'tag': tags_from_string(form.tags.data),
'language': {'identifier': language.code, 'name': language.name}
}
}
if type == 'link':
request_json['object']['attachment'] = [{'type': 'Link', 'href': form.link_url.data}]
elif type == 'image':
uploaded_file = request.files['image_file']
if uploaded_file and uploaded_file.filename != '':
# check if this is an allowed type of file
file_ext = os.path.splitext(uploaded_file.filename)[1]
if file_ext.lower() not in allowed_extensions:
abort(400, description="Invalid image type.")
new_filename = gibberish(15)
# set up the storage directory
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
final_place = os.path.join(directory, new_filename + file_ext)
uploaded_file.seek(0)
uploaded_file.save(final_place)
if file_ext.lower() == '.heic':
register_heif_opener()
Image.MAX_IMAGE_PIXELS = 89478485
# resize if necessary
img = Image.open(final_place)
if '.' + img.format.lower() in allowed_extensions:
img = ImageOps.exif_transpose(img)
# limit full sized version to 2000px
img.thumbnail((2000, 2000))
img.save(final_place)
request_json['object']['attachment'] = [{'type': 'Image', 'url': f'https://{current_app.config["SERVER_NAME"]}/{final_place.replace("app/", "")}',
'name': form.image_alt_text.data}]
elif type == 'video':
request_json['object']['attachment'] = [{'type': 'Document', 'url': form.video_url.data}]
elif type == 'poll':
request_json['object']['type'] = 'Question'
choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5,
form.choice_6, form.choice_7, form.choice_8, form.choice_9, form.choice_10]
key = 'oneOf' if form.mode.data == 'single' else 'anyOf'
request_json['object'][key] = []
for choice in choices:
choice_data = choice.data.strip()
if choice_data:
request_json['object'][key].append({'name': choice_data})
request_json['object']['endTime'] = end_poll_date(form.finish_in.data)
# todo: add try..except
post = Post.new(current_user, community, request_json)
current_user.language_id = form.language_id.data
g.site.last_active = utcnow()
post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}"
db.session.commit()
if post.image_id and post.image.file_path is None:
make_image_sizes(post.image_id, 170, 512, 'posts') # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
upvote_own_post(post)
notify_about_post(post)
if post_type == POST_TYPE_POLL:
if post.type == POST_TYPE_POLL:
poll = Poll.query.filter_by(post_id=post.id).first()
if not poll.local_only:
federate_post_to_user_followers(post)
@ -670,7 +722,7 @@ def add_post(actor, type):
federate_post(community, post)
return redirect(f"/post/{post.id}")
else:
else: # GET
form.communities.data = community.id
form.notify_author.data = True
if post_type == POST_TYPE_POLL:
@ -1939,7 +1991,7 @@ def check_url_already_posted():
def upvote_own_post(post):
post.score = 1
post.up_votes = 1
post.ranking = post_ranking(post.score, utcnow())
post.ranking = post.post_ranking(post.score, utcnow())
vote = PostVote(user_id=current_user.id, post_id=post.id, author_id=current_user.id, effect=1)
db.session.add(vote)
db.session.commit()

View file

@ -484,7 +484,7 @@ def end_poll_date(end_choice):
raise ValueError("Invalid choice")
def tags_from_string(tags: str) -> List[Tag]:
def tags_from_string(tags: str) -> List[dict]:
return_value = []
tags = tags.strip()
if tags == '':
@ -496,7 +496,7 @@ def tags_from_string(tags: str) -> List[Tag]:
tag = tag[1:]
tag_to_append = find_hashtag_or_create(tag)
if tag_to_append:
return_value.append(tag_to_append)
return_value.append({'type': 'Hashtag', 'name': tag_to_append.name})
return return_value

View file

@ -1,6 +1,7 @@
import html
from datetime import datetime, timedelta, date, timezone
from time import time
from typing import List, Union
from typing import List, Union, Type
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
import arrow
@ -15,7 +16,7 @@ from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.ext.mutable import MutableList
from flask_sqlalchemy import BaseQuery
from sqlalchemy_searchable import SearchQueryMixin
from app import db, login, cache, celery, httpx_client
from app import db, login, cache, celery, httpx_client, constants
import jwt
import os
import math
@ -1074,7 +1075,7 @@ class Post(db.Model):
url = db.Column(db.String(2048))
body = db.Column(db.Text)
body_html = db.Column(db.Text)
type = db.Column(db.Integer)
type = db.Column(db.Integer, default=constants.POST_TYPE_ARTICLE)
microblog = db.Column(db.Boolean, default=False)
comments_enabled = db.Column(db.Boolean, default=True)
deleted = db.Column(db.Boolean, default=False, index=True)
@ -1127,6 +1128,243 @@ class Post(db.Model):
def get_by_ap_id(cls, ap_id):
return cls.query.filter_by(ap_id=ap_id).first()
@classmethod
def new(cls, user: User, community: Community, request_json: dict, announce_id=None):
from app.activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \
make_image_sizes, notify_about_post
from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \
is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \
is_video_hosting_site, post_ranking, communities_banned_from
microblog = False
if 'name' not in request_json['object']: # Microblog posts
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
title = "[Microblog]"
microblog = True
else:
return None
else:
title = request_json['object']['name'].strip()
nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
post = Post(user_id=user.id, community_id=community.id,
title=html.unescape(title),
comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
ap_id=request_json['object']['id'],
ap_create_id=request_json['id'],
ap_announce_id=announce_id,
up_votes=1,
from_bot=user.bot,
score=instance_weight(user.ap_domain),
instance_id=user.instance_id,
indexable=user.indexable,
microblog=microblog,
posted_at=utcnow()
)
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
post.body_html = allowlist_html(request_json['object']['content'])
if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \
request_json['object']['source']['mediaType'] == 'text/markdown':
post.body = request_json['object']['source']['content']
post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML
else:
post.body = html_to_text(post.body_html)
elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
post.body = request_json['object']['content']
post.body_html = markdown_to_html(post.body)
else:
if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
post.body_html = allowlist_html(request_json['object']['content'])
post.body = html_to_text(post.body_html)
if microblog:
autogenerated_title = microblog_content_to_title(post.body_html)
if len(autogenerated_title) < 20:
title = '[Microblog] ' + autogenerated_title.strip()
else:
title = autogenerated_title.strip()
if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
post.nsfl = True
if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
post.nsfw = True
post.title = title
# Discard post if it contains certain phrases. Good for stopping spam floods.
blocked_phrases_list = blocked_phrases()
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.title:
return None
if post.body:
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.body:
return None
if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
'type' in request_json['object']['attachment'][0]:
alt_text = None
if request_json['object']['attachment'][0]['type'] == 'Link':
post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4
if request_json['object']['attachment'][0]['type'] == 'Document':
post.url = request_json['object']['attachment'][0]['url'] # Mastodon
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if request_json['object']['attachment'][0]['type'] == 'Image':
post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if post.url:
if is_image_url(post.url):
post.type = constants.POST_TYPE_IMAGE
if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
image = File(source_url=request_json['object']['image']['url'])
else:
image = File(source_url=post.url)
if alt_text:
image.alt_text = alt_text
db.session.add(image)
post.image = image
elif is_video_url(post.url): # youtube is detected later
post.type = constants.POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
post.image = image
else:
post.type = constants.POST_TYPE_LINK
domain = domain_from_url(post.url)
# notify about links to banned websites.
already_notified = set() # often admins and mods are the same people - avoid notifying them twice
if domain.notify_mods:
for community_member in post.community.moderators():
notify = Notification(title='Suspicious content', url=post.ap_id,
user_id=community_member.user_id,
author_id=user.id)
db.session.add(notify)
already_notified.add(community_member.user_id)
if domain.notify_admins:
for admin in Site.admins():
if admin.id not in already_notified:
notify = Notification(title='Suspicious content',
url=post.ap_id, user_id=admin.id,
author_id=user.id)
db.session.add(notify)
if domain.banned or domain.name.endswith('.pages.dev'):
raise Exception(domain.name + ' is blocked by admin')
else:
domain.post_count += 1
post.domain = domain
if post is not None:
if request_json['object']['type'] == 'Video':
post.type = constants.POST_TYPE_VIDEO
post.url = request_json['object']['id']
if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
icon = File(source_url=request_json['object']['icon'][-1]['url'])
db.session.add(icon)
post.image = icon
# Language. Lemmy uses 'language' while Mastodon has 'contentMap'
if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
language = find_language_or_create(request_json['object']['language']['identifier'],
request_json['object']['language']['name'])
post.language_id = language.id
elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
language = find_language(next(iter(request_json['object']['contentMap'])))
post.language_id = language.id if language else None
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
for json_tag in request_json['object']['tag']:
if json_tag and json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
if 'image' in request_json['object'] and post.image is None:
image = File(source_url=request_json['object']['image']['url'])
db.session.add(image)
post.image = image
if post.image is None and post.type == constants.POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
# Let's see if we can do better than the source instance did!
tn_url = post.url
if tn_url[:32] == 'https://www.youtube.com/watch?v=':
tn_url = 'https://youtu.be/' + tn_url[
32:43] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse(tn_url)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):
file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
post.image = file
db.session.add(file)
if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
post.indexable = False
if post.url:
post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com
if is_video_hosting_site(post.url):
post.type = constants.POST_TYPE_VIDEO
db.session.add(post)
post.ranking = post.post_ranking(post.score, post.posted_at)
community.post_count += 1
community.last_active = utcnow()
user.post_count += 1
db.session.commit()
# Polls need to be processed quite late because they need a post_id to refer to
if request_json['object']['type'] == 'Question':
post.type = constants.POST_TYPE_POLL
mode = 'single'
if 'anyOf' in request_json['object']:
mode = 'multiple'
poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
db.session.add(poll)
i = 1
for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
db.session.add(new_choice)
i += 1
db.session.commit()
if post.image_id:
make_image_sizes(post.image_id, 170, 512, 'posts',
community.low_quality) # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
if post.community_id not in communities_banned_from(user.id):
notify_about_post(post)
if user.reputation > 100:
post.up_votes += 1
post.score += 1
post.ranking = post.post_ranking(post.score, post.posted_at)
db.session.commit()
return post
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
epoch = datetime(1970, 1, 1)
@classmethod
def epoch_seconds(self, date):
td = date - self.epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def delete_dependencies(self):
db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete()
db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete()
@ -1420,7 +1658,7 @@ class PostReply(db.Model):
raise Exception('Gif comment ignored')
if reply_is_stupid(reply.body):
raise Exception('Stupid reply')
raise Exception('Low quality reply')
db.session.add(reply)
db.session.commit()

View file

@ -28,7 +28,7 @@ from app.post import bp
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, moderating_communities, joined_communities, \
reply_already_exists, reply_is_just_link_to_gif_reaction, moderating_communities, joined_communities, \
blocked_instances, blocked_domains, community_moderators, blocked_phrases, show_ban_message, recently_upvoted_posts, \
recently_downvoted_posts, recently_upvoted_post_replies, recently_downvoted_post_replies, reply_is_stupid, \
languages_for_form, menu_topics, add_to_modlog, blocked_communities, piefed_markdown_to_lemmy_markdown, \
@ -546,11 +546,18 @@ def add_reply(post_id: int, comment_id: int):
current_user.ip_address = ip_address()
current_user.language_id = form.language_id.data
try:
reply = PostReply.new(current_user, post, in_reply_to,
body=piefed_markdown_to_lemmy_markdown(form.body.data),
body_html=markdown_to_html(form.body.data),
notify_author=form.notify_author.data,
language_id=form.language_id.data)
except Exception as ex:
flash(_('Your reply was not accepted because %(reason)s', reason=str(ex)), 'error')
if in_reply_to.depth <= constants.THREAD_CUTOFF_DEPTH:
return redirect(url_for('activitypub.post_ap', post_id=post_id, _anchor=f'comment_{in_reply_to.id}'))
else:
return redirect(url_for('post.continue_discussion', post_id=post_id, comment_id=in_reply_to.parent_id))
form.body.data = ''
flash('Your comment has been added.')

View file

@ -898,51 +898,8 @@ def topic_tree() -> List:
return [topic for topic in topics_dict.values() if topic['topic'].parent_id is None]
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
epoch = datetime(1970, 1, 1)
def epoch_seconds(date):
td = date - epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def post_ranking(score, date: datetime):
if date is None:
date = datetime.utcnow()
if score is None:
score = 1
order = math.log(max(abs(score), 1), 10)
sign = 1 if score > 0 else -1 if score < 0 else 0
seconds = epoch_seconds(date) - 1685766018
return round(sign * order + seconds / 45000, 7)
# used for ranking comments
def _confidence(ups, downs):
n = ups + downs
if n == 0:
return 0.0
z = 1.281551565545
p = float(ups) / n
left = p + 1 / (2 * n) * z * z
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
under = 1 + 1 / n * z * z
return (left - right) / under
def confidence(ups, downs) -> float:
if ups is None or ups < 0:
ups = 0
if downs is None or downs < 0:
downs = 0
if ups + downs == 0:
return 0.0
else:
return _confidence(ups, downs)
return Post.post_ranking(score, date)
def opengraph_parse(url):