From cec844ec4f053d4d1bc9cf7d1368b566f49c6c4a Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:37:00 +1300 Subject: [PATCH 1/4] refactor post-new wip --- app/activitypub/util.py | 230 ++---------------------------------- app/community/routes.py | 42 +++---- app/models.py | 256 +++++++++++++++++++++++++++++++++++++++- app/post/routes.py | 19 ++- app/utils.py | 45 +------ 5 files changed, 294 insertions(+), 298 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 33e71395..422bbba2 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -25,12 +25,12 @@ from io import BytesIO import pytesseract from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ - is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ - blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \ + is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \ + shorten_string, remove_tracking_from_link, \ + microblog_content_to_title, generate_image_from_video_url, is_video_url, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ - html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \ - moderating_communities, is_video_hosting_site + html_to_text, add_to_modlog_activitypub, joined_communities, \ + moderating_communities from sqlalchemy import or_ @@ -1620,222 +1620,12 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json activity_log.exception_message = 'Community is local only, post discarded' activity_log.result = 'ignored' return None - microblog = False - if 'name' not in request_json['object']: # Microblog posts - if 'content' in request_json['object'] and request_json['object']['content'] is not None: - title = "[Microblog]" - microblog = True - else: - return None - else: - title = request_json['object']['name'].strip() + try: + post = Post.new(user, community, request_json, announce_id) + except Exception as ex: + activity_log.exception_message = str(ex) + return None - nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper() - post = Post(user_id=user.id, community_id=community.id, - title=html.unescape(title), - comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True, - sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False, - nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False, - nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title, - ap_id=request_json['object']['id'], - ap_create_id=request_json['id'], - ap_announce_id=announce_id, - type=constants.POST_TYPE_ARTICLE, - up_votes=1, - from_bot=user.bot, - score=instance_weight(user.ap_domain), - instance_id=user.instance_id, - indexable=user.indexable, - microblog=microblog - ) - if 'content' in request_json['object'] and request_json['object']['content'] is not None: - if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html': - post.body_html = allowlist_html(request_json['object']['content']) - if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown': - post.body = request_json['object']['source']['content'] - post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML - else: - post.body = html_to_text(post.body_html) - elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown': - post.body = request_json['object']['content'] - post.body_html = markdown_to_html(post.body) - else: - if not (request_json['object']['content'].startswith('

') or request_json['object']['content'].startswith('

')): - request_json['object']['content'] = '

' + request_json['object']['content'] + '

' - post.body_html = allowlist_html(request_json['object']['content']) - post.body = html_to_text(post.body_html) - if microblog: - autogenerated_title = microblog_content_to_title(post.body_html) - if len(autogenerated_title) < 20: - title = '[Microblog] ' + autogenerated_title.strip() - else: - title = autogenerated_title.strip() - if '[NSFL]' in title.upper() or '(NSFL)' in title.upper(): - post.nsfl = True - if '[NSFW]' in title.upper() or '(NSFW)' in title.upper(): - post.nsfw = True - post.title = title - # Discard post if it contains certain phrases. Good for stopping spam floods. - blocked_phrases_list = blocked_phrases() - for blocked_phrase in blocked_phrases_list: - if blocked_phrase in post.title: - return None - if post.body: - for blocked_phrase in blocked_phrases_list: - if blocked_phrase in post.body: - return None - if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \ - 'type' in request_json['object']['attachment'][0]: - alt_text = None - if request_json['object']['attachment'][0]['type'] == 'Link': - post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4 - if request_json['object']['attachment'][0]['type'] == 'Document': - post.url = request_json['object']['attachment'][0]['url'] # Mastodon - if 'name' in request_json['object']['attachment'][0]: - alt_text = request_json['object']['attachment'][0]['name'] - if request_json['object']['attachment'][0]['type'] == 'Image': - post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4 - if 'name' in request_json['object']['attachment'][0]: - alt_text = request_json['object']['attachment'][0]['name'] - if post.url: - if is_image_url(post.url): - post.type = POST_TYPE_IMAGE - if 'image' in request_json['object'] and 'url' in request_json['object']['image']: - image = File(source_url=request_json['object']['image']['url']) - else: - image = File(source_url=post.url) - if alt_text: - image.alt_text = alt_text - db.session.add(image) - post.image = image - elif is_video_url(post.url): # youtube is detected later - post.type = POST_TYPE_VIDEO - image = File(source_url=post.url) - db.session.add(image) - post.image = image - else: - post.type = POST_TYPE_LINK - domain = domain_from_url(post.url) - # notify about links to banned websites. - already_notified = set() # often admins and mods are the same people - avoid notifying them twice - if domain.notify_mods: - for community_member in post.community.moderators(): - notify = Notification(title='Suspicious content', url=post.ap_id, - user_id=community_member.user_id, - author_id=user.id) - db.session.add(notify) - already_notified.add(community_member.user_id) - if domain.notify_admins: - for admin in Site.admins(): - if admin.id not in already_notified: - notify = Notification(title='Suspicious content', - url=post.ap_id, user_id=admin.id, - author_id=user.id) - db.session.add(notify) - if domain.banned or domain.name.endswith('.pages.dev'): - post = None - activity_log.exception_message = domain.name + ' is blocked by admin' - else: - domain.post_count += 1 - post.domain = domain - - if post is not None: - if request_json['object']['type'] == 'Video': - post.type = POST_TYPE_VIDEO - post.url = request_json['object']['id'] - if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list): - icon = File(source_url=request_json['object']['icon'][-1]['url']) - db.session.add(icon) - post.image = icon - - # Language. Lemmy uses 'language' while Mastodon has 'contentMap' - if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict): - language = find_language_or_create(request_json['object']['language']['identifier'], - request_json['object']['language']['name']) - post.language_id = language.id - elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict): - language = find_language(next(iter(request_json['object']['contentMap']))) - post.language_id = language.id if language else None - if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list): - for json_tag in request_json['object']['tag']: - if json_tag and json_tag['type'] == 'Hashtag': - if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore - hashtag = find_hashtag_or_create(json_tag['name']) - if hashtag: - post.tags.append(hashtag) - if 'image' in request_json['object'] and post.image is None: - image = File(source_url=request_json['object']['image']['url']) - db.session.add(image) - post.image = image - if post.image is None and post.type == POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image - # Let's see if we can do better than the source instance did! - tn_url = post.url - if tn_url[:32] == 'https://www.youtube.com/watch?v=': - tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com - opengraph = opengraph_parse(tn_url) - if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): - filename = opengraph.get('og:image') or opengraph.get('og:image:url') - if not filename.startswith('/'): - file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295)) - post.image = file - db.session.add(file) - - if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public': - post.indexable = False - - if post.url: - post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com - if is_video_hosting_site(post.url): - post.type = POST_TYPE_VIDEO - db.session.add(post) - post.ranking = post_ranking(post.score, post.posted_at) - community.post_count += 1 - community.last_active = utcnow() - activity_log.result = 'success' - user.post_count += 1 - db.session.commit() - - # Polls need to be processed quite late because they need a post_id to refer to - if request_json['object']['type'] == 'Question': - post.type = POST_TYPE_POLL - mode = 'single' - if 'anyOf' in request_json['object']: - mode = 'multiple' - poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False) - db.session.add(poll) - i = 1 - for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']: - new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i) - db.session.add(new_choice) - i += 1 - db.session.commit() - - if post.image_id: - make_image_sizes(post.image_id, 170, 512, 'posts', community.low_quality) # the 512 sized image is for masonry view - - # Update list of cross posts - if post.url: - other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False, - Post.posted_at > post.posted_at - timedelta(days=6)).all() - for op in other_posts: - if op.cross_posts is None: - op.cross_posts = [post.id] - else: - op.cross_posts.append(post.id) - if post.cross_posts is None: - post.cross_posts = [op.id] - else: - post.cross_posts.append(op.id) - db.session.commit() - - if post.community_id not in communities_banned_from(user.id): - notify_about_post(post) - - if user.reputation > 100: - post.up_votes += 1 - post.score += 1 - post.ranking = post_ranking(post.score, post.posted_at) - db.session.commit() return post diff --git a/app/community/routes.py b/app/community/routes.py index ba80efe8..89857667 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -623,37 +623,31 @@ def add_post(actor, type): community = Community.query.get_or_404(form.communities.data) if not can_create_post(current_user, community): abort(401) - post = Post(user_id=current_user.id, community_id=form.communities.data, instance_id=1) - save_post(form, post, post_type) + + request_json = { + 'id': None, + 'object': { + 'name': form.title.data, + 'sticky': form.sticky.data, + 'nsfw': form.nsfw.data, + 'nsfl': form.nsfl.data, + 'id': gibberish(), # this will be updated once we have the post.id + 'mediaType': 'text/markdown', + 'content': form.body.data, + } + } + # todo: add try..except + post = Post.new(current_user, community, request_json) + community.post_count += 1 current_user.post_count += 1 community.last_active = g.site.last_active = utcnow() - db.session.commit() post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}" db.session.commit() - if post.image_id and post.image.file_path is None: - make_image_sizes(post.image_id, 170, 512, 'posts') # the 512 sized image is for masonry view - - # Update list of cross posts - if post.url: - other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False, - Post.posted_at > post.posted_at - timedelta(days=6)).all() - for op in other_posts: - if op.cross_posts is None: - op.cross_posts = [post.id] - else: - op.cross_posts.append(post.id) - if post.cross_posts is None: - post.cross_posts = [op.id] - else: - post.cross_posts.append(op.id) - db.session.commit() - upvote_own_post(post) - notify_about_post(post) - if post_type == POST_TYPE_POLL: + if post.type == POST_TYPE_POLL: poll = Poll.query.filter_by(post_id=post.id).first() if not poll.local_only: federate_post_to_user_followers(post) @@ -665,7 +659,7 @@ def add_post(actor, type): federate_post(community, post) return redirect(f"/post/{post.id}") - else: + else: # GET form.communities.data = community.id form.notify_author.data = True if post_type == POST_TYPE_POLL: diff --git a/app/models.py b/app/models.py index 0e649eb0..88de3df8 100644 --- a/app/models.py +++ b/app/models.py @@ -1,6 +1,7 @@ +import html from datetime import datetime, timedelta, date, timezone from time import time -from typing import List, Union +from typing import List, Union, Type from urllib.parse import urlparse, parse_qs, urlencode, urlunparse import arrow @@ -15,7 +16,7 @@ from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.ext.mutable import MutableList from flask_sqlalchemy import BaseQuery from sqlalchemy_searchable import SearchQueryMixin -from app import db, login, cache, celery, httpx_client +from app import db, login, cache, celery, httpx_client, constants import jwt import os import math @@ -1074,7 +1075,7 @@ class Post(db.Model): url = db.Column(db.String(2048)) body = db.Column(db.Text) body_html = db.Column(db.Text) - type = db.Column(db.Integer) + type = db.Column(db.Integer, default=constants.POST_TYPE_ARTICLE) microblog = db.Column(db.Boolean, default=False) comments_enabled = db.Column(db.Boolean, default=True) deleted = db.Column(db.Boolean, default=False, index=True) @@ -1127,6 +1128,253 @@ class Post(db.Model): def get_by_ap_id(cls, ap_id): return cls.query.filter_by(ap_id=ap_id).first() + @classmethod + def new(cls, user: User, community: Community, request_json: dict, announce_id=None): + from activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \ + make_image_sizes, notify_about_post + from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \ + is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \ + is_video_hosting_site, post_ranking, communities_banned_from + + microblog = False + if 'name' not in request_json['object']: # Microblog posts + if 'content' in request_json['object'] and request_json['object']['content'] is not None: + title = "[Microblog]" + microblog = True + else: + return None + else: + title = request_json['object']['name'].strip() + nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper() + post = Post(user_id=user.id, community_id=community.id, + title=html.unescape(title), + comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True, + sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False, + nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False, + nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title, + ap_id=request_json['object']['id'], + ap_create_id=request_json['id'], + ap_announce_id=announce_id, + up_votes=1, + from_bot=user.bot, + score=instance_weight(user.ap_domain), + instance_id=user.instance_id, + indexable=user.indexable, + microblog=microblog + ) + + if 'content' in request_json['object'] and request_json['object']['content'] is not None: + if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html': + post.body_html = allowlist_html(request_json['object']['content']) + if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \ + request_json['object']['source']['mediaType'] == 'text/markdown': + post.body = request_json['object']['source']['content'] + post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML + else: + post.body = html_to_text(post.body_html) + elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown': + post.body = request_json['object']['content'] + post.body_html = markdown_to_html(post.body) + else: + if not (request_json['object']['content'].startswith('

') or request_json['object']['content'].startswith('

')): + request_json['object']['content'] = '

' + request_json['object']['content'] + '

' + post.body_html = allowlist_html(request_json['object']['content']) + post.body = html_to_text(post.body_html) + if microblog: + autogenerated_title = microblog_content_to_title(post.body_html) + if len(autogenerated_title) < 20: + title = '[Microblog] ' + autogenerated_title.strip() + else: + title = autogenerated_title.strip() + if '[NSFL]' in title.upper() or '(NSFL)' in title.upper(): + post.nsfl = True + if '[NSFW]' in title.upper() or '(NSFW)' in title.upper(): + post.nsfw = True + post.title = title + # Discard post if it contains certain phrases. Good for stopping spam floods. + blocked_phrases_list = blocked_phrases() + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.title: + return None + if post.body: + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.body: + return None + + if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \ + 'type' in request_json['object']['attachment'][0]: + alt_text = None + if request_json['object']['attachment'][0]['type'] == 'Link': + post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4 + if request_json['object']['attachment'][0]['type'] == 'Document': + post.url = request_json['object']['attachment'][0]['url'] # Mastodon + if 'name' in request_json['object']['attachment'][0]: + alt_text = request_json['object']['attachment'][0]['name'] + if request_json['object']['attachment'][0]['type'] == 'Image': + post.url = request_json['object']['attachment'][0]['url'] # PixelFed, PieFed, Lemmy >= 0.19.4 + if 'name' in request_json['object']['attachment'][0]: + alt_text = request_json['object']['attachment'][0]['name'] + if post.url: + if is_image_url(post.url): + post.type = constants.POST_TYPE_IMAGE + if 'image' in request_json['object'] and 'url' in request_json['object']['image']: + image = File(source_url=request_json['object']['image']['url']) + else: + image = File(source_url=post.url) + if alt_text: + image.alt_text = alt_text + db.session.add(image) + post.image = image + elif is_video_url(post.url): # youtube is detected later + post.type = constants.POST_TYPE_VIDEO + image = File(source_url=post.url) + db.session.add(image) + post.image = image + else: + post.type = constants.POST_TYPE_LINK + domain = domain_from_url(post.url) + # notify about links to banned websites. + already_notified = set() # often admins and mods are the same people - avoid notifying them twice + if domain.notify_mods: + for community_member in post.community.moderators(): + notify = Notification(title='Suspicious content', url=post.ap_id, + user_id=community_member.user_id, + author_id=user.id) + db.session.add(notify) + already_notified.add(community_member.user_id) + if domain.notify_admins: + for admin in Site.admins(): + if admin.id not in already_notified: + notify = Notification(title='Suspicious content', + url=post.ap_id, user_id=admin.id, + author_id=user.id) + db.session.add(notify) + if domain.banned or domain.name.endswith('.pages.dev'): + raise Exception(domain.name + ' is blocked by admin') + else: + domain.post_count += 1 + post.domain = domain + + if post is not None: + if request_json['object']['type'] == 'Video': + post.type = constants.POST_TYPE_VIDEO + post.url = request_json['object']['id'] + if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list): + icon = File(source_url=request_json['object']['icon'][-1]['url']) + db.session.add(icon) + post.image = icon + + # Language. Lemmy uses 'language' while Mastodon has 'contentMap' + if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict): + language = find_language_or_create(request_json['object']['language']['identifier'], + request_json['object']['language']['name']) + post.language_id = language.id + elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict): + language = find_language(next(iter(request_json['object']['contentMap']))) + post.language_id = language.id if language else None + if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list): + for json_tag in request_json['object']['tag']: + if json_tag and json_tag['type'] == 'Hashtag': + if json_tag['name'][1:].lower() != community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore + hashtag = find_hashtag_or_create(json_tag['name']) + if hashtag: + post.tags.append(hashtag) + if 'image' in request_json['object'] and post.image is None: + image = File(source_url=request_json['object']['image']['url']) + db.session.add(image) + post.image = image + if post.image is None and post.type == constants.POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image + # Let's see if we can do better than the source instance did! + tn_url = post.url + if tn_url[:32] == 'https://www.youtube.com/watch?v=': + tn_url = 'https://youtu.be/' + tn_url[ + 32:43] # better chance of thumbnail from youtu.be than youtube.com + opengraph = opengraph_parse(tn_url) + if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): + filename = opengraph.get('og:image') or opengraph.get('og:image:url') + if not filename.startswith('/'): + file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295)) + post.image = file + db.session.add(file) + + if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public': + post.indexable = False + + if post.url: + post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com + if is_video_hosting_site(post.url): + post.type = constants.POST_TYPE_VIDEO + db.session.add(post) + post.ranking = post_ranking(post.score, post.posted_at) + community.post_count += 1 + community.last_active = utcnow() + user.post_count += 1 + db.session.commit() + + # Polls need to be processed quite late because they need a post_id to refer to + if request_json['object']['type'] == 'Question': + post.type = constants.POST_TYPE_POLL + mode = 'single' + if 'anyOf' in request_json['object']: + mode = 'multiple' + poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False) + db.session.add(poll) + i = 1 + for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']: + new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i) + db.session.add(new_choice) + i += 1 + db.session.commit() + + if post.image_id: + make_image_sizes(post.image_id, 170, 512, 'posts', + community.low_quality) # the 512 sized image is for masonry view + + # Update list of cross posts + if post.url: + other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False, + Post.posted_at > post.posted_at - timedelta(days=6)).all() + for op in other_posts: + if op.cross_posts is None: + op.cross_posts = [post.id] + else: + op.cross_posts.append(post.id) + if post.cross_posts is None: + post.cross_posts = [op.id] + else: + post.cross_posts.append(op.id) + db.session.commit() + + if post.community_id not in communities_banned_from(user.id): + notify_about_post(post) + + if user.reputation > 100: + post.up_votes += 1 + post.score += 1 + post.ranking = Post.post_ranking(post.score, post.posted_at) + db.session.commit() + + return post + + # All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9 + epoch = datetime(1970, 1, 1) + + @classmethod + def epoch_seconds(self, date): + td = date - self.epoch + return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) + + @classmethod + def post_ranking(cls, score, date: datetime): + if date is None: + date = datetime.utcnow() + if score is None: + score = 1 + order = math.log(max(abs(score), 1), 10) + sign = 1 if score > 0 else -1 if score < 0 else 0 + seconds = Post.epoch_seconds(date) - 1685766018 + return round(sign * order + seconds / 45000, 7) + def delete_dependencies(self): db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete() db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete() @@ -1420,7 +1668,7 @@ class PostReply(db.Model): raise Exception('Gif comment ignored') if reply_is_stupid(reply.body): - raise Exception('Stupid reply') + raise Exception('Low quality reply') db.session.add(reply) db.session.commit() diff --git a/app/post/routes.py b/app/post/routes.py index 47f5fc77..f58d2b6e 100644 --- a/app/post/routes.py +++ b/app/post/routes.py @@ -28,7 +28,7 @@ from app.post import bp from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \ shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \ request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \ - reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, moderating_communities, joined_communities, \ + reply_already_exists, reply_is_just_link_to_gif_reaction, moderating_communities, joined_communities, \ blocked_instances, blocked_domains, community_moderators, blocked_phrases, show_ban_message, recently_upvoted_posts, \ recently_downvoted_posts, recently_upvoted_post_replies, recently_downvoted_post_replies, reply_is_stupid, \ languages_for_form, menu_topics, add_to_modlog, blocked_communities, piefed_markdown_to_lemmy_markdown, \ @@ -546,11 +546,18 @@ def add_reply(post_id: int, comment_id: int): current_user.ip_address = ip_address() current_user.language_id = form.language_id.data - reply = PostReply.new(current_user, post, in_reply_to, - body=piefed_markdown_to_lemmy_markdown(form.body.data), - body_html=markdown_to_html(form.body.data), - notify_author=form.notify_author.data, - language_id=form.language_id.data) + try: + reply = PostReply.new(current_user, post, in_reply_to, + body=piefed_markdown_to_lemmy_markdown(form.body.data), + body_html=markdown_to_html(form.body.data), + notify_author=form.notify_author.data, + language_id=form.language_id.data) + except Exception as ex: + flash(_('Your reply was not accepted because %(reason)s', reason=str(ex)), 'error') + if in_reply_to.depth <= constants.THREAD_CUTOFF_DEPTH: + return redirect(url_for('activitypub.post_ap', post_id=post_id, _anchor=f'comment_{in_reply_to.id}')) + else: + return redirect(url_for('post.continue_discussion', post_id=post_id, comment_id=in_reply_to.parent_id)) form.body.data = '' flash('Your comment has been added.') diff --git a/app/utils.py b/app/utils.py index 29fc7c7c..d24357d2 100644 --- a/app/utils.py +++ b/app/utils.py @@ -898,51 +898,8 @@ def topic_tree() -> List: return [topic for topic in topics_dict.values() if topic['topic'].parent_id is None] -# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9 -epoch = datetime(1970, 1, 1) - -def epoch_seconds(date): - td = date - epoch - return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) - - def post_ranking(score, date: datetime): - if date is None: - date = datetime.utcnow() - if score is None: - score = 1 - order = math.log(max(abs(score), 1), 10) - sign = 1 if score > 0 else -1 if score < 0 else 0 - seconds = epoch_seconds(date) - 1685766018 - return round(sign * order + seconds / 45000, 7) - - -# used for ranking comments -def _confidence(ups, downs): - n = ups + downs - - if n == 0: - return 0.0 - - z = 1.281551565545 - p = float(ups) / n - - left = p + 1 / (2 * n) * z * z - right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n)) - under = 1 + 1 / n * z * z - - return (left - right) / under - - -def confidence(ups, downs) -> float: - if ups is None or ups < 0: - ups = 0 - if downs is None or downs < 0: - downs = 0 - if ups + downs == 0: - return 0.0 - else: - return _confidence(ups, downs) + return Post.post_ranking(score, date) def opengraph_parse(url): From feca5992afd6fb0a88c0b6c8d0ac6b3ea06b36b6 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Wed, 16 Oct 2024 21:42:30 +1300 Subject: [PATCH 2/4] refactor post-new wip --- app/community/routes.py | 24 +++++++++++++++++++++++- app/community/util.py | 4 ++-- app/models.py | 6 +++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/app/community/routes.py b/app/community/routes.py index 89857667..0e4811e3 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -21,7 +21,7 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre EditCommunityWikiPageForm from app.community.util import search_for_community, actor_to_community, \ save_post, save_icon_file, save_banner_file, send_to_remote_instance, \ - delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users + delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \ SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \ REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS @@ -624,6 +624,8 @@ def add_post(actor, type): if not can_create_post(current_user, community): abort(401) + language = Language.query.get(form.language_id.data) + request_json = { 'id': None, 'object': { @@ -634,13 +636,33 @@ def add_post(actor, type): 'id': gibberish(), # this will be updated once we have the post.id 'mediaType': 'text/markdown', 'content': form.body.data, + 'tag': tags_from_string(form.tags.data), + 'language': {'identifier': language.code, 'name': language.name} } } + if type == 'link': + request_json['object']['attachment'] = {'type': 'Link', 'href': form.link_url.data} + elif type == 'image': + request_json['object']['attachment'] = {'type': 'Image', 'url': image_url, 'name': form.image_alt_text} + elif type == 'video': + request_json['object']['attachment'] = {'type': 'Document', 'url': form.video_url.data} + elif type == 'poll': + request_json['object']['type'] = 'Question' + choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5, + form.choice_6, form.choice_7, form.choice_8, form.choice_9, form.choice_10] + key = 'oneOf' if form.mode.data == 'single' else 'anyOf' + request_json['object'][key] = [] + for choice in choices: + choice_data = choice.data.strip() + if choice_data: + request_json['object'][key].append({'name': choice_data}) + # todo: add try..except post = Post.new(current_user, community, request_json) community.post_count += 1 current_user.post_count += 1 + current_user.language_id = form.language_id.data community.last_active = g.site.last_active = utcnow() post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}" db.session.commit() diff --git a/app/community/util.py b/app/community/util.py index b865d48c..59376fd7 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -484,7 +484,7 @@ def end_poll_date(end_choice): raise ValueError("Invalid choice") -def tags_from_string(tags: str) -> List[Tag]: +def tags_from_string(tags: str) -> List[dict]: return_value = [] tags = tags.strip() if tags == '': @@ -496,7 +496,7 @@ def tags_from_string(tags: str) -> List[Tag]: tag = tag[1:] tag_to_append = find_hashtag_or_create(tag) if tag_to_append: - return_value.append(tag_to_append) + return_value.append({'type': 'Hashtag', 'name': tag_to_append.name}) return return_value diff --git a/app/models.py b/app/models.py index 88de3df8..8587e190 100644 --- a/app/models.py +++ b/app/models.py @@ -635,7 +635,7 @@ class User(UserMixin, db.Model): verification_token = db.Column(db.String(16), index=True) banned = db.Column(db.Boolean, default=False) deleted = db.Column(db.Boolean, default=False) - deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True) + deleted_by = db.Column(db.Integer, index=True) about = db.Column(db.Text) # markdown about_html = db.Column(db.Text) # html keywords = db.Column(db.String(256)) @@ -1079,7 +1079,7 @@ class Post(db.Model): microblog = db.Column(db.Boolean, default=False) comments_enabled = db.Column(db.Boolean, default=True) deleted = db.Column(db.Boolean, default=False, index=True) - deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True) + deleted_by = db.Column(db.Integer, index=True) mea_culpa = db.Column(db.Boolean, default=False) has_embed = db.Column(db.Boolean, default=False) reply_count = db.Column(db.Integer, default=0) @@ -1602,7 +1602,7 @@ class PostReply(db.Model): created_at = db.Column(db.DateTime, index=True, default=utcnow) posted_at = db.Column(db.DateTime, index=True, default=utcnow) deleted = db.Column(db.Boolean, default=False, index=True) - deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True) + deleted_by = db.Column(db.Integer, index=True) ip = db.Column(db.String(50)) from_bot = db.Column(db.Boolean, default=False) up_votes = db.Column(db.Integer, default=0) From 518f165c1f2363b7c9b8c9d41ef7339dd1e4c0db Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Sun, 20 Oct 2024 20:21:30 +1300 Subject: [PATCH 3/4] post-new: image posts --- app/community/routes.py | 52 +++++++++++++++++++++++++++++++++++------ app/models.py | 20 ++++------------ 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/app/community/routes.py b/app/community/routes.py index 0e4811e3..b037e408 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -1,12 +1,16 @@ +import base64 +import os from collections import namedtuple from io import BytesIO from random import randint import flask +from PIL import Image, ImageOps from flask import redirect, url_for, flash, request, make_response, session, Markup, current_app, abort, g, json, \ jsonify from flask_login import current_user, login_required from flask_babel import _ +from pillow_heif import register_heif_opener from slugify import slugify from sqlalchemy import or_, desc, text @@ -21,7 +25,8 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre EditCommunityWikiPageForm from app.community.util import search_for_community, actor_to_community, \ save_post, save_icon_file, save_banner_file, send_to_remote_instance, \ - delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string + delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string, \ + allowed_extensions, end_poll_date from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \ SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \ REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS @@ -38,7 +43,7 @@ from app.utils import get_setting, render_template, allowlist_html, markdown_to_ joined_communities, moderating_communities, blocked_domains, mimetype_from_url, blocked_instances, \ community_moderators, communities_banned_from, show_ban_message, recently_upvoted_posts, recently_downvoted_posts, \ blocked_users, post_ranking, languages_for_form, english_language_id, menu_topics, add_to_modlog, \ - blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown + blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown, ensure_directory_exists from feedgen.feed import FeedGenerator from datetime import timezone, timedelta from copy import copy @@ -581,8 +586,8 @@ def add_post(actor, type): return show_ban_message() community = actor_to_community(actor) + post_type = POST_TYPE_ARTICLE if type == 'discussion': - post_type = POST_TYPE_ARTICLE form = CreateDiscussionForm() elif type == 'link': post_type = POST_TYPE_LINK @@ -630,6 +635,7 @@ def add_post(actor, type): 'id': None, 'object': { 'name': form.title.data, + 'type': 'Page', 'sticky': form.sticky.data, 'nsfw': form.nsfw.data, 'nsfl': form.nsfl.data, @@ -641,11 +647,42 @@ def add_post(actor, type): } } if type == 'link': - request_json['object']['attachment'] = {'type': 'Link', 'href': form.link_url.data} + request_json['object']['attachment'] = [{'type': 'Link', 'href': form.link_url.data}] elif type == 'image': - request_json['object']['attachment'] = {'type': 'Image', 'url': image_url, 'name': form.image_alt_text} + uploaded_file = request.files['image_file'] + if uploaded_file and uploaded_file.filename != '': + # check if this is an allowed type of file + file_ext = os.path.splitext(uploaded_file.filename)[1] + if file_ext.lower() not in allowed_extensions: + abort(400, description="Invalid image type.") + + new_filename = gibberish(15) + # set up the storage directory + directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4] + ensure_directory_exists(directory) + + final_place = os.path.join(directory, new_filename + file_ext) + uploaded_file.seek(0) + uploaded_file.save(final_place) + + if file_ext.lower() == '.heic': + register_heif_opener() + + Image.MAX_IMAGE_PIXELS = 89478485 + + # resize if necessary + img = Image.open(final_place) + if '.' + img.format.lower() in allowed_extensions: + img = ImageOps.exif_transpose(img) + + # limit full sized version to 2000px + img.thumbnail((2000, 2000)) + img.save(final_place) + + request_json['object']['attachment'] = [{'type': 'Image', 'url': f'https://{current_app.config["SERVER_NAME"]}/{final_place.replace("app/", "")}', + 'name': form.image_alt_text.data}] elif type == 'video': - request_json['object']['attachment'] = {'type': 'Document', 'url': form.video_url.data} + request_json['object']['attachment'] = [{'type': 'Document', 'url': form.video_url.data}] elif type == 'poll': request_json['object']['type'] = 'Question' choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5, @@ -656,6 +693,7 @@ def add_post(actor, type): choice_data = choice.data.strip() if choice_data: request_json['object'][key].append({'name': choice_data}) + request_json['object']['endTime'] = end_poll_date(form.finish_in.data) # todo: add try..except post = Post.new(current_user, community, request_json) @@ -1950,7 +1988,7 @@ def check_url_already_posted(): def upvote_own_post(post): post.score = 1 post.up_votes = 1 - post.ranking = post_ranking(post.score, utcnow()) + post.ranking = post.post_ranking(post.score, utcnow()) vote = PostVote(user_id=current_user.id, post_id=post.id, author_id=current_user.id, effect=1) db.session.add(vote) db.session.commit() diff --git a/app/models.py b/app/models.py index 8587e190..76d27aca 100644 --- a/app/models.py +++ b/app/models.py @@ -1130,7 +1130,7 @@ class Post(db.Model): @classmethod def new(cls, user: User, community: Community, request_json: dict, announce_id=None): - from activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \ + from app.activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \ make_image_sizes, notify_about_post from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \ is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \ @@ -1160,7 +1160,8 @@ class Post(db.Model): score=instance_weight(user.ap_domain), instance_id=user.instance_id, indexable=user.indexable, - microblog=microblog + microblog=microblog, + posted_at=utcnow() ) if 'content' in request_json['object'] and request_json['object']['content'] is not None: @@ -1305,7 +1306,7 @@ class Post(db.Model): if is_video_hosting_site(post.url): post.type = constants.POST_TYPE_VIDEO db.session.add(post) - post.ranking = post_ranking(post.score, post.posted_at) + post.ranking = post.post_ranking(post.score, post.posted_at) community.post_count += 1 community.last_active = utcnow() user.post_count += 1 @@ -1351,7 +1352,7 @@ class Post(db.Model): if user.reputation > 100: post.up_votes += 1 post.score += 1 - post.ranking = Post.post_ranking(post.score, post.posted_at) + post.ranking = post.post_ranking(post.score, post.posted_at) db.session.commit() return post @@ -1364,17 +1365,6 @@ class Post(db.Model): td = date - self.epoch return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000) - @classmethod - def post_ranking(cls, score, date: datetime): - if date is None: - date = datetime.utcnow() - if score is None: - score = 1 - order = math.log(max(abs(score), 1), 10) - sign = 1 if score > 0 else -1 if score < 0 else 0 - seconds = Post.epoch_seconds(date) - 1685766018 - return round(sign * order + seconds / 45000, 7) - def delete_dependencies(self): db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete() db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete() From 1d77d42946b4e3b26a84b58badd94834c3010d2e Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Mon, 21 Oct 2024 09:55:39 +1300 Subject: [PATCH 4/4] post-new: do not double count --- app/community/routes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/community/routes.py b/app/community/routes.py index b037e408..ebafeff4 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -698,10 +698,8 @@ def add_post(actor, type): # todo: add try..except post = Post.new(current_user, community, request_json) - community.post_count += 1 - current_user.post_count += 1 current_user.language_id = form.language_id.data - community.last_active = g.site.last_active = utcnow() + g.site.last_active = utcnow() post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}" db.session.commit()