Merge branch 'refactor-post-new'

This commit is contained in:
rimu 2024-10-21 09:56:20 +13:00
commit 42439fb90a
6 changed files with 351 additions and 307 deletions

View file

@ -25,12 +25,12 @@ from io import BytesIO
import pytesseract import pytesseract
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ shorten_string, remove_tracking_from_link, \
blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \ microblog_content_to_title, generate_image_from_video_url, is_video_url, \
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \ html_to_text, add_to_modlog_activitypub, joined_communities, \
moderating_communities, is_video_hosting_site moderating_communities
from sqlalchemy import or_ from sqlalchemy import or_
@ -1625,222 +1625,12 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
activity_log.exception_message = 'Community is local only, post discarded' activity_log.exception_message = 'Community is local only, post discarded'
activity_log.result = 'ignored' activity_log.result = 'ignored'
return None return None
microblog = False try:
if 'name' not in request_json['object']: # Microblog posts post = Post.new(user, community, request_json, announce_id)
if 'content' in request_json['object'] and request_json['object']['content'] is not None: except Exception as ex:
title = "[Microblog]" activity_log.exception_message = str(ex)
microblog = True
else:
return None return None
else:
title = request_json['object']['name'].strip()
nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
post = Post(user_id=user.id, community_id=community.id,
title=html.unescape(title),
comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
ap_id=request_json['object']['id'],
ap_create_id=request_json['id'],
ap_announce_id=announce_id,
type=constants.POST_TYPE_ARTICLE,
up_votes=1,
from_bot=user.bot,
score=instance_weight(user.ap_domain),
instance_id=user.instance_id,
indexable=user.indexable,
microblog=microblog
)
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
post.body_html = allowlist_html(request_json['object']['content'])
if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown':
post.body = request_json['object']['source']['content']
post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML
else:
post.body = html_to_text(post.body_html)
elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
post.body = request_json['object']['content']
post.body_html = markdown_to_html(post.body)
else:
if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
post.body_html = allowlist_html(request_json['object']['content'])
post.body = html_to_text(post.body_html)
if microblog:
autogenerated_title = microblog_content_to_title(post.body_html)
if len(autogenerated_title) < 20:
title = '[Microblog] ' + autogenerated_title.strip()
else:
title = autogenerated_title.strip()
if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
post.nsfl = True
if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
post.nsfw = True
post.title = title
# Discard post if it contains certain phrases. Good for stopping spam floods.
blocked_phrases_list = blocked_phrases()
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.title:
return None
if post.body:
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.body:
return None
if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
'type' in request_json['object']['attachment'][0]:
alt_text = None
if request_json['object']['attachment'][0]['type'] == 'Link':
post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4
if request_json['object']['attachment'][0]['type'] == 'Document':
post.url = request_json['object']['attachment'][0]['url'] # Mastodon
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if request_json['object']['attachment'][0]['type'] == 'Image':
post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if post.url:
if is_image_url(post.url):
post.type = POST_TYPE_IMAGE
if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
image = File(source_url=request_json['object']['image']['url'])
else:
image = File(source_url=post.url)
if alt_text:
image.alt_text = alt_text
db.session.add(image)
post.image = image
elif is_video_url(post.url): # youtube is detected later
post.type = POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
post.image = image
else:
post.type = POST_TYPE_LINK
domain = domain_from_url(post.url)
# notify about links to banned websites.
already_notified = set() # often admins and mods are the same people - avoid notifying them twice
if domain.notify_mods:
for community_member in post.community.moderators():
notify = Notification(title='Suspicious content', url=post.ap_id,
user_id=community_member.user_id,
author_id=user.id)
db.session.add(notify)
already_notified.add(community_member.user_id)
if domain.notify_admins:
for admin in Site.admins():
if admin.id not in already_notified:
notify = Notification(title='Suspicious content',
url=post.ap_id, user_id=admin.id,
author_id=user.id)
db.session.add(notify)
if domain.banned or domain.name.endswith('.pages.dev'):
post = None
activity_log.exception_message = domain.name + ' is blocked by admin'
else:
domain.post_count += 1
post.domain = domain
if post is not None:
if request_json['object']['type'] == 'Video':
post.type = POST_TYPE_VIDEO
post.url = request_json['object']['id']
if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
icon = File(source_url=request_json['object']['icon'][-1]['url'])
db.session.add(icon)
post.image = icon
# Language. Lemmy uses 'language' while Mastodon has 'contentMap'
if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
language = find_language_or_create(request_json['object']['language']['identifier'],
request_json['object']['language']['name'])
post.language_id = language.id
elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
language = find_language(next(iter(request_json['object']['contentMap'])))
post.language_id = language.id if language else None
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
for json_tag in request_json['object']['tag']:
if json_tag and json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
if 'image' in request_json['object'] and post.image is None:
image = File(source_url=request_json['object']['image']['url'])
db.session.add(image)
post.image = image
if post.image is None and post.type == POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
# Let's see if we can do better than the source instance did!
tn_url = post.url
if tn_url[:32] == 'https://www.youtube.com/watch?v=':
tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse(tn_url)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):
file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
post.image = file
db.session.add(file)
if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
post.indexable = False
if post.url:
post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com
if is_video_hosting_site(post.url):
post.type = POST_TYPE_VIDEO
db.session.add(post)
post.ranking = post_ranking(post.score, post.posted_at)
community.post_count += 1
community.last_active = utcnow()
activity_log.result = 'success'
user.post_count += 1
db.session.commit()
# Polls need to be processed quite late because they need a post_id to refer to
if request_json['object']['type'] == 'Question':
post.type = POST_TYPE_POLL
mode = 'single'
if 'anyOf' in request_json['object']:
mode = 'multiple'
poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
db.session.add(poll)
i = 1
for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
db.session.add(new_choice)
i += 1
db.session.commit()
if post.image_id:
make_image_sizes(post.image_id, 170, 512, 'posts', community.low_quality) # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
if post.community_id not in communities_banned_from(user.id):
notify_about_post(post)
if user.reputation > 100:
post.up_votes += 1
post.score += 1
post.ranking = post_ranking(post.score, post.posted_at)
db.session.commit()
return post return post

View file

@ -1,12 +1,16 @@
import base64
import os
from collections import namedtuple from collections import namedtuple
from io import BytesIO from io import BytesIO
from random import randint from random import randint
import flask import flask
from PIL import Image, ImageOps
from flask import redirect, url_for, flash, request, make_response, session, Markup, current_app, abort, g, json, \ from flask import redirect, url_for, flash, request, make_response, session, Markup, current_app, abort, g, json, \
jsonify jsonify
from flask_login import current_user, login_required from flask_login import current_user, login_required
from flask_babel import _ from flask_babel import _
from pillow_heif import register_heif_opener
from slugify import slugify from slugify import slugify
from sqlalchemy import or_, desc, text from sqlalchemy import or_, desc, text
@ -21,7 +25,8 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre
EditCommunityWikiPageForm EditCommunityWikiPageForm
from app.community.util import search_for_community, actor_to_community, \ from app.community.util import search_for_community, actor_to_community, \
save_post, save_icon_file, save_banner_file, send_to_remote_instance, \ save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string, \
allowed_extensions, end_poll_date
from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \ from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \
SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \ SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \
REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS
@ -38,7 +43,7 @@ from app.utils import get_setting, render_template, allowlist_html, markdown_to_
joined_communities, moderating_communities, blocked_domains, mimetype_from_url, blocked_instances, \ joined_communities, moderating_communities, blocked_domains, mimetype_from_url, blocked_instances, \
community_moderators, communities_banned_from, show_ban_message, recently_upvoted_posts, recently_downvoted_posts, \ community_moderators, communities_banned_from, show_ban_message, recently_upvoted_posts, recently_downvoted_posts, \
blocked_users, post_ranking, languages_for_form, english_language_id, menu_topics, add_to_modlog, \ blocked_users, post_ranking, languages_for_form, english_language_id, menu_topics, add_to_modlog, \
blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown, ensure_directory_exists
from feedgen.feed import FeedGenerator from feedgen.feed import FeedGenerator
from datetime import timezone, timedelta from datetime import timezone, timedelta
from copy import copy from copy import copy
@ -586,8 +591,8 @@ def add_post(actor, type):
return show_ban_message() return show_ban_message()
community = actor_to_community(actor) community = actor_to_community(actor)
if type == 'discussion':
post_type = POST_TYPE_ARTICLE post_type = POST_TYPE_ARTICLE
if type == 'discussion':
form = CreateDiscussionForm() form = CreateDiscussionForm()
elif type == 'link': elif type == 'link':
post_type = POST_TYPE_LINK post_type = POST_TYPE_LINK
@ -628,37 +633,84 @@ def add_post(actor, type):
community = Community.query.get_or_404(form.communities.data) community = Community.query.get_or_404(form.communities.data)
if not can_create_post(current_user, community): if not can_create_post(current_user, community):
abort(401) abort(401)
post = Post(user_id=current_user.id, community_id=form.communities.data, instance_id=1)
save_post(form, post, post_type) language = Language.query.get(form.language_id.data)
community.post_count += 1
current_user.post_count += 1 request_json = {
community.last_active = g.site.last_active = utcnow() 'id': None,
db.session.commit() 'object': {
'name': form.title.data,
'type': 'Page',
'sticky': form.sticky.data,
'nsfw': form.nsfw.data,
'nsfl': form.nsfl.data,
'id': gibberish(), # this will be updated once we have the post.id
'mediaType': 'text/markdown',
'content': form.body.data,
'tag': tags_from_string(form.tags.data),
'language': {'identifier': language.code, 'name': language.name}
}
}
if type == 'link':
request_json['object']['attachment'] = [{'type': 'Link', 'href': form.link_url.data}]
elif type == 'image':
uploaded_file = request.files['image_file']
if uploaded_file and uploaded_file.filename != '':
# check if this is an allowed type of file
file_ext = os.path.splitext(uploaded_file.filename)[1]
if file_ext.lower() not in allowed_extensions:
abort(400, description="Invalid image type.")
new_filename = gibberish(15)
# set up the storage directory
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
final_place = os.path.join(directory, new_filename + file_ext)
uploaded_file.seek(0)
uploaded_file.save(final_place)
if file_ext.lower() == '.heic':
register_heif_opener()
Image.MAX_IMAGE_PIXELS = 89478485
# resize if necessary
img = Image.open(final_place)
if '.' + img.format.lower() in allowed_extensions:
img = ImageOps.exif_transpose(img)
# limit full sized version to 2000px
img.thumbnail((2000, 2000))
img.save(final_place)
request_json['object']['attachment'] = [{'type': 'Image', 'url': f'https://{current_app.config["SERVER_NAME"]}/{final_place.replace("app/", "")}',
'name': form.image_alt_text.data}]
elif type == 'video':
request_json['object']['attachment'] = [{'type': 'Document', 'url': form.video_url.data}]
elif type == 'poll':
request_json['object']['type'] = 'Question'
choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5,
form.choice_6, form.choice_7, form.choice_8, form.choice_9, form.choice_10]
key = 'oneOf' if form.mode.data == 'single' else 'anyOf'
request_json['object'][key] = []
for choice in choices:
choice_data = choice.data.strip()
if choice_data:
request_json['object'][key].append({'name': choice_data})
request_json['object']['endTime'] = end_poll_date(form.finish_in.data)
# todo: add try..except
post = Post.new(current_user, community, request_json)
current_user.language_id = form.language_id.data
g.site.last_active = utcnow()
post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}" post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}"
db.session.commit() db.session.commit()
if post.image_id and post.image.file_path is None:
make_image_sizes(post.image_id, 170, 512, 'posts') # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
upvote_own_post(post) upvote_own_post(post)
notify_about_post(post)
if post_type == POST_TYPE_POLL: if post.type == POST_TYPE_POLL:
poll = Poll.query.filter_by(post_id=post.id).first() poll = Poll.query.filter_by(post_id=post.id).first()
if not poll.local_only: if not poll.local_only:
federate_post_to_user_followers(post) federate_post_to_user_followers(post)
@ -670,7 +722,7 @@ def add_post(actor, type):
federate_post(community, post) federate_post(community, post)
return redirect(f"/post/{post.id}") return redirect(f"/post/{post.id}")
else: else: # GET
form.communities.data = community.id form.communities.data = community.id
form.notify_author.data = True form.notify_author.data = True
if post_type == POST_TYPE_POLL: if post_type == POST_TYPE_POLL:
@ -1939,7 +1991,7 @@ def check_url_already_posted():
def upvote_own_post(post): def upvote_own_post(post):
post.score = 1 post.score = 1
post.up_votes = 1 post.up_votes = 1
post.ranking = post_ranking(post.score, utcnow()) post.ranking = post.post_ranking(post.score, utcnow())
vote = PostVote(user_id=current_user.id, post_id=post.id, author_id=current_user.id, effect=1) vote = PostVote(user_id=current_user.id, post_id=post.id, author_id=current_user.id, effect=1)
db.session.add(vote) db.session.add(vote)
db.session.commit() db.session.commit()

View file

@ -484,7 +484,7 @@ def end_poll_date(end_choice):
raise ValueError("Invalid choice") raise ValueError("Invalid choice")
def tags_from_string(tags: str) -> List[Tag]: def tags_from_string(tags: str) -> List[dict]:
return_value = [] return_value = []
tags = tags.strip() tags = tags.strip()
if tags == '': if tags == '':
@ -496,7 +496,7 @@ def tags_from_string(tags: str) -> List[Tag]:
tag = tag[1:] tag = tag[1:]
tag_to_append = find_hashtag_or_create(tag) tag_to_append = find_hashtag_or_create(tag)
if tag_to_append: if tag_to_append:
return_value.append(tag_to_append) return_value.append({'type': 'Hashtag', 'name': tag_to_append.name})
return return_value return return_value

View file

@ -1,6 +1,7 @@
import html
from datetime import datetime, timedelta, date, timezone from datetime import datetime, timedelta, date, timezone
from time import time from time import time
from typing import List, Union from typing import List, Union, Type
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
import arrow import arrow
@ -15,7 +16,7 @@ from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.ext.mutable import MutableList from sqlalchemy.ext.mutable import MutableList
from flask_sqlalchemy import BaseQuery from flask_sqlalchemy import BaseQuery
from sqlalchemy_searchable import SearchQueryMixin from sqlalchemy_searchable import SearchQueryMixin
from app import db, login, cache, celery, httpx_client from app import db, login, cache, celery, httpx_client, constants
import jwt import jwt
import os import os
import math import math
@ -1074,7 +1075,7 @@ class Post(db.Model):
url = db.Column(db.String(2048)) url = db.Column(db.String(2048))
body = db.Column(db.Text) body = db.Column(db.Text)
body_html = db.Column(db.Text) body_html = db.Column(db.Text)
type = db.Column(db.Integer) type = db.Column(db.Integer, default=constants.POST_TYPE_ARTICLE)
microblog = db.Column(db.Boolean, default=False) microblog = db.Column(db.Boolean, default=False)
comments_enabled = db.Column(db.Boolean, default=True) comments_enabled = db.Column(db.Boolean, default=True)
deleted = db.Column(db.Boolean, default=False, index=True) deleted = db.Column(db.Boolean, default=False, index=True)
@ -1127,6 +1128,243 @@ class Post(db.Model):
def get_by_ap_id(cls, ap_id): def get_by_ap_id(cls, ap_id):
return cls.query.filter_by(ap_id=ap_id).first() return cls.query.filter_by(ap_id=ap_id).first()
@classmethod
def new(cls, user: User, community: Community, request_json: dict, announce_id=None):
from app.activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \
make_image_sizes, notify_about_post
from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \
is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \
is_video_hosting_site, post_ranking, communities_banned_from
microblog = False
if 'name' not in request_json['object']: # Microblog posts
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
title = "[Microblog]"
microblog = True
else:
return None
else:
title = request_json['object']['name'].strip()
nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
post = Post(user_id=user.id, community_id=community.id,
title=html.unescape(title),
comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
ap_id=request_json['object']['id'],
ap_create_id=request_json['id'],
ap_announce_id=announce_id,
up_votes=1,
from_bot=user.bot,
score=instance_weight(user.ap_domain),
instance_id=user.instance_id,
indexable=user.indexable,
microblog=microblog,
posted_at=utcnow()
)
if 'content' in request_json['object'] and request_json['object']['content'] is not None:
if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
post.body_html = allowlist_html(request_json['object']['content'])
if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \
request_json['object']['source']['mediaType'] == 'text/markdown':
post.body = request_json['object']['source']['content']
post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML
else:
post.body = html_to_text(post.body_html)
elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
post.body = request_json['object']['content']
post.body_html = markdown_to_html(post.body)
else:
if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
post.body_html = allowlist_html(request_json['object']['content'])
post.body = html_to_text(post.body_html)
if microblog:
autogenerated_title = microblog_content_to_title(post.body_html)
if len(autogenerated_title) < 20:
title = '[Microblog] ' + autogenerated_title.strip()
else:
title = autogenerated_title.strip()
if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
post.nsfl = True
if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
post.nsfw = True
post.title = title
# Discard post if it contains certain phrases. Good for stopping spam floods.
blocked_phrases_list = blocked_phrases()
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.title:
return None
if post.body:
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.body:
return None
if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
'type' in request_json['object']['attachment'][0]:
alt_text = None
if request_json['object']['attachment'][0]['type'] == 'Link':
post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4
if request_json['object']['attachment'][0]['type'] == 'Document':
post.url = request_json['object']['attachment'][0]['url'] # Mastodon
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if request_json['object']['attachment'][0]['type'] == 'Image':
post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if post.url:
if is_image_url(post.url):
post.type = constants.POST_TYPE_IMAGE
if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
image = File(source_url=request_json['object']['image']['url'])
else:
image = File(source_url=post.url)
if alt_text:
image.alt_text = alt_text
db.session.add(image)
post.image = image
elif is_video_url(post.url): # youtube is detected later
post.type = constants.POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
post.image = image
else:
post.type = constants.POST_TYPE_LINK
domain = domain_from_url(post.url)
# notify about links to banned websites.
already_notified = set() # often admins and mods are the same people - avoid notifying them twice
if domain.notify_mods:
for community_member in post.community.moderators():
notify = Notification(title='Suspicious content', url=post.ap_id,
user_id=community_member.user_id,
author_id=user.id)
db.session.add(notify)
already_notified.add(community_member.user_id)
if domain.notify_admins:
for admin in Site.admins():
if admin.id not in already_notified:
notify = Notification(title='Suspicious content',
url=post.ap_id, user_id=admin.id,
author_id=user.id)
db.session.add(notify)
if domain.banned or domain.name.endswith('.pages.dev'):
raise Exception(domain.name + ' is blocked by admin')
else:
domain.post_count += 1
post.domain = domain
if post is not None:
if request_json['object']['type'] == 'Video':
post.type = constants.POST_TYPE_VIDEO
post.url = request_json['object']['id']
if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
icon = File(source_url=request_json['object']['icon'][-1]['url'])
db.session.add(icon)
post.image = icon
# Language. Lemmy uses 'language' while Mastodon has 'contentMap'
if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
language = find_language_or_create(request_json['object']['language']['identifier'],
request_json['object']['language']['name'])
post.language_id = language.id
elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
language = find_language(next(iter(request_json['object']['contentMap'])))
post.language_id = language.id if language else None
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
for json_tag in request_json['object']['tag']:
if json_tag and json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
if 'image' in request_json['object'] and post.image is None:
image = File(source_url=request_json['object']['image']['url'])
db.session.add(image)
post.image = image
if post.image is None and post.type == constants.POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
# Let's see if we can do better than the source instance did!
tn_url = post.url
if tn_url[:32] == 'https://www.youtube.com/watch?v=':
tn_url = 'https://youtu.be/' + tn_url[
32:43] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse(tn_url)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):
file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
post.image = file
db.session.add(file)
if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
post.indexable = False
if post.url:
post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com
if is_video_hosting_site(post.url):
post.type = constants.POST_TYPE_VIDEO
db.session.add(post)
post.ranking = post.post_ranking(post.score, post.posted_at)
community.post_count += 1
community.last_active = utcnow()
user.post_count += 1
db.session.commit()
# Polls need to be processed quite late because they need a post_id to refer to
if request_json['object']['type'] == 'Question':
post.type = constants.POST_TYPE_POLL
mode = 'single'
if 'anyOf' in request_json['object']:
mode = 'multiple'
poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
db.session.add(poll)
i = 1
for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
db.session.add(new_choice)
i += 1
db.session.commit()
if post.image_id:
make_image_sizes(post.image_id, 170, 512, 'posts',
community.low_quality) # the 512 sized image is for masonry view
# Update list of cross posts
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
Post.posted_at > post.posted_at - timedelta(days=6)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
if post.community_id not in communities_banned_from(user.id):
notify_about_post(post)
if user.reputation > 100:
post.up_votes += 1
post.score += 1
post.ranking = post.post_ranking(post.score, post.posted_at)
db.session.commit()
return post
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
epoch = datetime(1970, 1, 1)
@classmethod
def epoch_seconds(self, date):
td = date - self.epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def delete_dependencies(self): def delete_dependencies(self):
db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete() db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete()
db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete() db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete()
@ -1420,7 +1658,7 @@ class PostReply(db.Model):
raise Exception('Gif comment ignored') raise Exception('Gif comment ignored')
if reply_is_stupid(reply.body): if reply_is_stupid(reply.body):
raise Exception('Stupid reply') raise Exception('Low quality reply')
db.session.add(reply) db.session.add(reply)
db.session.commit() db.session.commit()

View file

@ -28,7 +28,7 @@ from app.post import bp
from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \ from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \ shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \
request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \ request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, moderating_communities, joined_communities, \ reply_already_exists, reply_is_just_link_to_gif_reaction, moderating_communities, joined_communities, \
blocked_instances, blocked_domains, community_moderators, blocked_phrases, show_ban_message, recently_upvoted_posts, \ blocked_instances, blocked_domains, community_moderators, blocked_phrases, show_ban_message, recently_upvoted_posts, \
recently_downvoted_posts, recently_upvoted_post_replies, recently_downvoted_post_replies, reply_is_stupid, \ recently_downvoted_posts, recently_upvoted_post_replies, recently_downvoted_post_replies, reply_is_stupid, \
languages_for_form, menu_topics, add_to_modlog, blocked_communities, piefed_markdown_to_lemmy_markdown, \ languages_for_form, menu_topics, add_to_modlog, blocked_communities, piefed_markdown_to_lemmy_markdown, \
@ -546,11 +546,18 @@ def add_reply(post_id: int, comment_id: int):
current_user.ip_address = ip_address() current_user.ip_address = ip_address()
current_user.language_id = form.language_id.data current_user.language_id = form.language_id.data
try:
reply = PostReply.new(current_user, post, in_reply_to, reply = PostReply.new(current_user, post, in_reply_to,
body=piefed_markdown_to_lemmy_markdown(form.body.data), body=piefed_markdown_to_lemmy_markdown(form.body.data),
body_html=markdown_to_html(form.body.data), body_html=markdown_to_html(form.body.data),
notify_author=form.notify_author.data, notify_author=form.notify_author.data,
language_id=form.language_id.data) language_id=form.language_id.data)
except Exception as ex:
flash(_('Your reply was not accepted because %(reason)s', reason=str(ex)), 'error')
if in_reply_to.depth <= constants.THREAD_CUTOFF_DEPTH:
return redirect(url_for('activitypub.post_ap', post_id=post_id, _anchor=f'comment_{in_reply_to.id}'))
else:
return redirect(url_for('post.continue_discussion', post_id=post_id, comment_id=in_reply_to.parent_id))
form.body.data = '' form.body.data = ''
flash('Your comment has been added.') flash('Your comment has been added.')

View file

@ -898,51 +898,8 @@ def topic_tree() -> List:
return [topic for topic in topics_dict.values() if topic['topic'].parent_id is None] return [topic for topic in topics_dict.values() if topic['topic'].parent_id is None]
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
epoch = datetime(1970, 1, 1)
def epoch_seconds(date):
td = date - epoch
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
def post_ranking(score, date: datetime): def post_ranking(score, date: datetime):
if date is None: return Post.post_ranking(score, date)
date = datetime.utcnow()
if score is None:
score = 1
order = math.log(max(abs(score), 1), 10)
sign = 1 if score > 0 else -1 if score < 0 else 0
seconds = epoch_seconds(date) - 1685766018
return round(sign * order + seconds / 45000, 7)
# used for ranking comments
def _confidence(ups, downs):
n = ups + downs
if n == 0:
return 0.0
z = 1.281551565545
p = float(ups) / n
left = p + 1 / (2 * n) * z * z
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
under = 1 + 1 / n * z * z
return (left - right) / under
def confidence(ups, downs) -> float:
if ups is None or ups < 0:
ups = 0
if downs is None or downs < 0:
downs = 0
if ups + downs == 0:
return 0.0
else:
return _confidence(ups, downs)
def opengraph_parse(url): def opengraph_parse(url):