From cec844ec4f053d4d1bc9cf7d1368b566f49c6c4a Mon Sep 17 00:00:00 2001
From: rimu <3310831+rimu@users.noreply.github.com>
Date: Mon, 14 Oct 2024 15:37:00 +1300
Subject: [PATCH 1/4] refactor post-new wip

---
 app/activitypub/util.py | 230 ++----------------------------------
 app/community/routes.py |  42 +++----
 app/models.py           | 256 +++++++++++++++++++++++++++++++++++++++-
 app/post/routes.py      |  19 ++-
 app/utils.py            |  45 +------
 5 files changed, 294 insertions(+), 298 deletions(-)

diff --git a/app/activitypub/util.py b/app/activitypub/util.py
index 33e71395..422bbba2 100644
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@@ -25,12 +25,12 @@ from io import BytesIO
 import pytesseract
 
 from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
-    is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
-    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
-    blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
+    is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
+    shorten_string, remove_tracking_from_link, \
+    microblog_content_to_title, generate_image_from_video_url, is_video_url, \
     notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
-    html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \
-    moderating_communities, is_video_hosting_site
+    html_to_text, add_to_modlog_activitypub, joined_communities, \
+    moderating_communities
 
 from sqlalchemy import or_
 
@@ -1620,222 +1620,12 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
         activity_log.exception_message = 'Community is local only, post discarded'
         activity_log.result = 'ignored'
         return None
-    microblog = False
-    if 'name' not in request_json['object']:    # Microblog posts
-        if 'content' in request_json['object'] and request_json['object']['content'] is not None:
-            title = "[Microblog]"
-            microblog = True
-        else:
-            return None
-    else:
-        title = request_json['object']['name'].strip()
+    try:
+        post = Post.new(user, community, request_json, announce_id)
+    except Exception as ex:
+        activity_log.exception_message = str(ex)
+        return None
 
-    nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
-    post = Post(user_id=user.id, community_id=community.id,
-                title=html.unescape(title),
-                comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
-                sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
-                nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
-                nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
-                ap_id=request_json['object']['id'],
-                ap_create_id=request_json['id'],
-                ap_announce_id=announce_id,
-                type=constants.POST_TYPE_ARTICLE,
-                up_votes=1,
-                from_bot=user.bot,
-                score=instance_weight(user.ap_domain),
-                instance_id=user.instance_id,
-                indexable=user.indexable,
-                microblog=microblog
-                )
-    if 'content' in request_json['object'] and request_json['object']['content'] is not None:
-        if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
-            post.body_html = allowlist_html(request_json['object']['content'])
-            if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown':
-                post.body = request_json['object']['source']['content']
-                post.body_html = markdown_to_html(post.body)          # prefer Markdown if provided, overwrite version obtained from HTML
-            else:
-                post.body = html_to_text(post.body_html)
-        elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
-            post.body = request_json['object']['content']
-            post.body_html = markdown_to_html(post.body)
-        else:
-            if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
-                request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
-            post.body_html = allowlist_html(request_json['object']['content'])
-            post.body = html_to_text(post.body_html)
-        if microblog:
-            autogenerated_title = microblog_content_to_title(post.body_html)
-            if len(autogenerated_title) < 20:
-                title = '[Microblog] ' + autogenerated_title.strip()
-            else:
-                title = autogenerated_title.strip()
-            if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
-                post.nsfl = True
-            if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
-                post.nsfw = True
-            post.title = title
-    # Discard post if it contains certain phrases. Good for stopping spam floods.
-    blocked_phrases_list = blocked_phrases()
-    for blocked_phrase in blocked_phrases_list:
-        if blocked_phrase in post.title:
-            return None
-    if post.body:
-        for blocked_phrase in blocked_phrases_list:
-            if blocked_phrase in post.body:
-                return None
-    if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
-            'type' in request_json['object']['attachment'][0]:
-        alt_text = None
-        if request_json['object']['attachment'][0]['type'] == 'Link':
-            post.url = request_json['object']['attachment'][0]['href']              # Lemmy < 0.19.4
-        if request_json['object']['attachment'][0]['type'] == 'Document':
-            post.url = request_json['object']['attachment'][0]['url']               # Mastodon
-            if 'name' in request_json['object']['attachment'][0]:
-                alt_text = request_json['object']['attachment'][0]['name']
-        if request_json['object']['attachment'][0]['type'] == 'Image':
-            post.url = request_json['object']['attachment'][0]['url']               # PixelFed, PieFed, Lemmy >= 0.19.4
-            if 'name' in request_json['object']['attachment'][0]:
-                alt_text = request_json['object']['attachment'][0]['name']
-        if post.url:
-            if is_image_url(post.url):
-                post.type = POST_TYPE_IMAGE
-                if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
-                    image = File(source_url=request_json['object']['image']['url'])
-                else:
-                    image = File(source_url=post.url)
-                if alt_text:
-                    image.alt_text = alt_text
-                db.session.add(image)
-                post.image = image
-            elif is_video_url(post.url):    # youtube is detected later
-                post.type = POST_TYPE_VIDEO
-                image = File(source_url=post.url)
-                db.session.add(image)
-                post.image = image
-            else:
-                post.type = POST_TYPE_LINK
-            domain = domain_from_url(post.url)
-            # notify about links to banned websites.
-            already_notified = set()  # often admins and mods are the same people - avoid notifying them twice
-            if domain.notify_mods:
-                for community_member in post.community.moderators():
-                    notify = Notification(title='Suspicious content', url=post.ap_id,
-                                          user_id=community_member.user_id,
-                                          author_id=user.id)
-                    db.session.add(notify)
-                    already_notified.add(community_member.user_id)
-            if domain.notify_admins:
-                for admin in Site.admins():
-                    if admin.id not in already_notified:
-                        notify = Notification(title='Suspicious content',
-                                              url=post.ap_id, user_id=admin.id,
-                                              author_id=user.id)
-                        db.session.add(notify)
-            if domain.banned or domain.name.endswith('.pages.dev'):
-                post = None
-                activity_log.exception_message = domain.name + ' is blocked by admin'
-            else:
-                domain.post_count += 1
-                post.domain = domain
-
-    if post is not None:
-        if request_json['object']['type'] == 'Video':
-            post.type = POST_TYPE_VIDEO
-            post.url = request_json['object']['id']
-            if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
-                icon = File(source_url=request_json['object']['icon'][-1]['url'])
-                db.session.add(icon)
-                post.image = icon
-
-        # Language. Lemmy uses 'language' while Mastodon has 'contentMap'
-        if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
-            language = find_language_or_create(request_json['object']['language']['identifier'],
-                                               request_json['object']['language']['name'])
-            post.language_id = language.id
-        elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
-            language = find_language(next(iter(request_json['object']['contentMap'])))
-            post.language_id = language.id if language else None
-        if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
-            for json_tag in request_json['object']['tag']:
-                if json_tag and json_tag['type'] == 'Hashtag':
-                    if json_tag['name'][1:].lower() != community.name.lower():             # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
-                        hashtag = find_hashtag_or_create(json_tag['name'])
-                        if hashtag:
-                            post.tags.append(hashtag)
-        if 'image' in request_json['object'] and post.image is None:
-            image = File(source_url=request_json['object']['image']['url'])
-            db.session.add(image)
-            post.image = image
-        if post.image is None and post.type == POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
-            # Let's see if we can do better than the source instance did!
-            tn_url = post.url
-            if tn_url[:32] == 'https://www.youtube.com/watch?v=':
-                tn_url = 'https://youtu.be/' + tn_url[32:43]            # better chance of thumbnail from youtu.be than youtube.com
-            opengraph = opengraph_parse(tn_url)
-            if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
-                filename = opengraph.get('og:image') or opengraph.get('og:image:url')
-                if not filename.startswith('/'):
-                    file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
-                    post.image = file
-                    db.session.add(file)
-
-        if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
-            post.indexable = False
-
-        if post.url:
-            post.url = remove_tracking_from_link(post.url)      # moved here as changes youtu.be to youtube.com
-            if is_video_hosting_site(post.url):
-                post.type = POST_TYPE_VIDEO
-        db.session.add(post)
-        post.ranking = post_ranking(post.score, post.posted_at)
-        community.post_count += 1
-        community.last_active = utcnow()
-        activity_log.result = 'success'
-        user.post_count += 1
-        db.session.commit()
-
-        # Polls need to be processed quite late because they need a post_id to refer to
-        if request_json['object']['type'] == 'Question':
-            post.type = POST_TYPE_POLL
-            mode = 'single'
-            if 'anyOf' in request_json['object']:
-                mode = 'multiple'
-            poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
-            db.session.add(poll)
-            i = 1
-            for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
-                new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
-                db.session.add(new_choice)
-                i += 1
-            db.session.commit()
-
-        if post.image_id:
-            make_image_sizes(post.image_id, 170, 512, 'posts', community.low_quality)  # the 512 sized image is for masonry view
-
-        # Update list of cross posts
-        if post.url:
-            other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
-                                    Post.posted_at > post.posted_at - timedelta(days=6)).all()
-            for op in other_posts:
-                if op.cross_posts is None:
-                    op.cross_posts = [post.id]
-                else:
-                    op.cross_posts.append(post.id)
-                if post.cross_posts is None:
-                    post.cross_posts = [op.id]
-                else:
-                    post.cross_posts.append(op.id)
-            db.session.commit()
-
-        if post.community_id not in communities_banned_from(user.id):
-            notify_about_post(post)
-
-        if user.reputation > 100:
-            post.up_votes += 1
-            post.score += 1
-            post.ranking = post_ranking(post.score, post.posted_at)
-            db.session.commit()
     return post
 
 
diff --git a/app/community/routes.py b/app/community/routes.py
index ba80efe8..89857667 100644
--- a/app/community/routes.py
+++ b/app/community/routes.py
@@ -623,37 +623,31 @@ def add_post(actor, type):
         community = Community.query.get_or_404(form.communities.data)
         if not can_create_post(current_user, community):
             abort(401)
-        post = Post(user_id=current_user.id, community_id=form.communities.data, instance_id=1)
-        save_post(form, post, post_type)
+
+        request_json = {
+            'id': None,
+            'object': {
+                'name': form.title.data,
+                'sticky': form.sticky.data,
+                'nsfw': form.nsfw.data,
+                'nsfl': form.nsfl.data,
+                'id': gibberish(),   # this will  be updated once we have the post.id
+                'mediaType': 'text/markdown',
+                'content': form.body.data,
+            }
+        }
+        # todo: add try..except
+        post = Post.new(current_user, community, request_json)
+
         community.post_count += 1
         current_user.post_count += 1
         community.last_active = g.site.last_active = utcnow()
-        db.session.commit()
         post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}"
         db.session.commit()
 
-        if post.image_id and post.image.file_path is None:
-            make_image_sizes(post.image_id, 170, 512, 'posts')  # the 512 sized image is for masonry view
-
-        # Update list of cross posts
-        if post.url:
-            other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
-                                    Post.posted_at > post.posted_at - timedelta(days=6)).all()
-            for op in other_posts:
-                if op.cross_posts is None:
-                    op.cross_posts = [post.id]
-                else:
-                    op.cross_posts.append(post.id)
-                if post.cross_posts is None:
-                    post.cross_posts = [op.id]
-                else:
-                    post.cross_posts.append(op.id)
-            db.session.commit()
-
         upvote_own_post(post)
-        notify_about_post(post)
 
-        if post_type == POST_TYPE_POLL:
+        if post.type == POST_TYPE_POLL:
             poll = Poll.query.filter_by(post_id=post.id).first()
             if not poll.local_only:
                 federate_post_to_user_followers(post)
@@ -665,7 +659,7 @@ def add_post(actor, type):
                 federate_post(community, post)
 
         return redirect(f"/post/{post.id}")
-    else:
+    else: # GET
         form.communities.data = community.id
         form.notify_author.data = True
         if post_type == POST_TYPE_POLL:
diff --git a/app/models.py b/app/models.py
index 0e649eb0..88de3df8 100644
--- a/app/models.py
+++ b/app/models.py
@@ -1,6 +1,7 @@
+import html
 from datetime import datetime, timedelta, date, timezone
 from time import time
-from typing import List, Union
+from typing import List, Union, Type
 from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
 
 import arrow
@@ -15,7 +16,7 @@ from sqlalchemy.dialects.postgresql import ARRAY
 from sqlalchemy.ext.mutable import MutableList
 from flask_sqlalchemy import BaseQuery
 from sqlalchemy_searchable import SearchQueryMixin
-from app import db, login, cache, celery, httpx_client
+from app import db, login, cache, celery, httpx_client, constants
 import jwt
 import os
 import math
@@ -1074,7 +1075,7 @@ class Post(db.Model):
     url = db.Column(db.String(2048))
     body = db.Column(db.Text)
     body_html = db.Column(db.Text)
-    type = db.Column(db.Integer)
+    type = db.Column(db.Integer, default=constants.POST_TYPE_ARTICLE)
     microblog = db.Column(db.Boolean, default=False)
     comments_enabled = db.Column(db.Boolean, default=True)
     deleted = db.Column(db.Boolean, default=False, index=True)
@@ -1127,6 +1128,253 @@ class Post(db.Model):
     def get_by_ap_id(cls, ap_id):
         return cls.query.filter_by(ap_id=ap_id).first()
 
+    @classmethod
+    def new(cls, user: User, community: Community, request_json: dict, announce_id=None):
+        from activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \
+            make_image_sizes, notify_about_post
+        from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \
+            is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \
+            is_video_hosting_site, post_ranking, communities_banned_from
+
+        microblog = False
+        if 'name' not in request_json['object']:  # Microblog posts
+            if 'content' in request_json['object'] and request_json['object']['content'] is not None:
+                title = "[Microblog]"
+                microblog = True
+            else:
+                return None
+        else:
+            title = request_json['object']['name'].strip()
+        nsfl_in_title = '[NSFL]' in title.upper() or '(NSFL)' in title.upper()
+        post = Post(user_id=user.id, community_id=community.id,
+                    title=html.unescape(title),
+                    comments_enabled=request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True,
+                    sticky=request_json['object']['stickied'] if 'stickied' in request_json['object'] else False,
+                    nsfw=request_json['object']['sensitive'] if 'sensitive' in request_json['object'] else False,
+                    nsfl=request_json['object']['nsfl'] if 'nsfl' in request_json['object'] else nsfl_in_title,
+                    ap_id=request_json['object']['id'],
+                    ap_create_id=request_json['id'],
+                    ap_announce_id=announce_id,
+                    up_votes=1,
+                    from_bot=user.bot,
+                    score=instance_weight(user.ap_domain),
+                    instance_id=user.instance_id,
+                    indexable=user.indexable,
+                    microblog=microblog
+                    )
+
+        if 'content' in request_json['object'] and request_json['object']['content'] is not None:
+            if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
+                post.body_html = allowlist_html(request_json['object']['content'])
+                if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \
+                        request_json['object']['source']['mediaType'] == 'text/markdown':
+                    post.body = request_json['object']['source']['content']
+                    post.body_html = markdown_to_html(post.body)  # prefer Markdown if provided, overwrite version obtained from HTML
+                else:
+                    post.body = html_to_text(post.body_html)
+            elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown':
+                post.body = request_json['object']['content']
+                post.body_html = markdown_to_html(post.body)
+            else:
+                if not (request_json['object']['content'].startswith('<p>') or request_json['object']['content'].startswith('<blockquote>')):
+                    request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
+                post.body_html = allowlist_html(request_json['object']['content'])
+                post.body = html_to_text(post.body_html)
+            if microblog:
+                autogenerated_title = microblog_content_to_title(post.body_html)
+                if len(autogenerated_title) < 20:
+                    title = '[Microblog] ' + autogenerated_title.strip()
+                else:
+                    title = autogenerated_title.strip()
+                if '[NSFL]' in title.upper() or '(NSFL)' in title.upper():
+                    post.nsfl = True
+                if '[NSFW]' in title.upper() or '(NSFW)' in title.upper():
+                    post.nsfw = True
+                post.title = title
+        # Discard post if it contains certain phrases. Good for stopping spam floods.
+        blocked_phrases_list = blocked_phrases()
+        for blocked_phrase in blocked_phrases_list:
+            if blocked_phrase in post.title:
+                return None
+        if post.body:
+            for blocked_phrase in blocked_phrases_list:
+                if blocked_phrase in post.body:
+                    return None
+
+        if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
+                'type' in request_json['object']['attachment'][0]:
+            alt_text = None
+            if request_json['object']['attachment'][0]['type'] == 'Link':
+                post.url = request_json['object']['attachment'][0]['href']  # Lemmy < 0.19.4
+            if request_json['object']['attachment'][0]['type'] == 'Document':
+                post.url = request_json['object']['attachment'][0]['url']  # Mastodon
+                if 'name' in request_json['object']['attachment'][0]:
+                    alt_text = request_json['object']['attachment'][0]['name']
+            if request_json['object']['attachment'][0]['type'] == 'Image':
+                post.url = request_json['object']['attachment'][0]['url']  # PixelFed, PieFed, Lemmy >= 0.19.4
+                if 'name' in request_json['object']['attachment'][0]:
+                    alt_text = request_json['object']['attachment'][0]['name']
+            if post.url:
+                if is_image_url(post.url):
+                    post.type = constants.POST_TYPE_IMAGE
+                    if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
+                        image = File(source_url=request_json['object']['image']['url'])
+                    else:
+                        image = File(source_url=post.url)
+                    if alt_text:
+                        image.alt_text = alt_text
+                    db.session.add(image)
+                    post.image = image
+                elif is_video_url(post.url):  # youtube is detected later
+                    post.type = constants.POST_TYPE_VIDEO
+                    image = File(source_url=post.url)
+                    db.session.add(image)
+                    post.image = image
+                else:
+                    post.type = constants.POST_TYPE_LINK
+                domain = domain_from_url(post.url)
+                # notify about links to banned websites.
+                already_notified = set()  # often admins and mods are the same people - avoid notifying them twice
+                if domain.notify_mods:
+                    for community_member in post.community.moderators():
+                        notify = Notification(title='Suspicious content', url=post.ap_id,
+                                              user_id=community_member.user_id,
+                                              author_id=user.id)
+                        db.session.add(notify)
+                        already_notified.add(community_member.user_id)
+                if domain.notify_admins:
+                    for admin in Site.admins():
+                        if admin.id not in already_notified:
+                            notify = Notification(title='Suspicious content',
+                                                  url=post.ap_id, user_id=admin.id,
+                                                  author_id=user.id)
+                            db.session.add(notify)
+                if domain.banned or domain.name.endswith('.pages.dev'):
+                    raise Exception(domain.name + ' is blocked by admin')
+                else:
+                    domain.post_count += 1
+                    post.domain = domain
+
+        if post is not None:
+            if request_json['object']['type'] == 'Video':
+                post.type = constants.POST_TYPE_VIDEO
+                post.url = request_json['object']['id']
+                if 'icon' in request_json['object'] and isinstance(request_json['object']['icon'], list):
+                    icon = File(source_url=request_json['object']['icon'][-1]['url'])
+                    db.session.add(icon)
+                    post.image = icon
+
+            # Language. Lemmy uses 'language' while Mastodon has 'contentMap'
+            if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
+                language = find_language_or_create(request_json['object']['language']['identifier'],
+                                                   request_json['object']['language']['name'])
+                post.language_id = language.id
+            elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
+                language = find_language(next(iter(request_json['object']['contentMap'])))
+                post.language_id = language.id if language else None
+            if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
+                for json_tag in request_json['object']['tag']:
+                    if json_tag and json_tag['type'] == 'Hashtag':
+                        if json_tag['name'][1:].lower() != community.name.lower():  # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
+                            hashtag = find_hashtag_or_create(json_tag['name'])
+                            if hashtag:
+                                post.tags.append(hashtag)
+            if 'image' in request_json['object'] and post.image is None:
+                image = File(source_url=request_json['object']['image']['url'])
+                db.session.add(image)
+                post.image = image
+            if post.image is None and post.type == constants.POST_TYPE_LINK:  # This is a link post but the source instance has not provided a thumbnail image
+                # Let's see if we can do better than the source instance did!
+                tn_url = post.url
+                if tn_url[:32] == 'https://www.youtube.com/watch?v=':
+                    tn_url = 'https://youtu.be/' + tn_url[
+                                                   32:43]  # better chance of thumbnail from youtu.be than youtube.com
+                opengraph = opengraph_parse(tn_url)
+                if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
+                    filename = opengraph.get('og:image') or opengraph.get('og:image:url')
+                    if not filename.startswith('/'):
+                        file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
+                        post.image = file
+                        db.session.add(file)
+
+            if 'searchableBy' in request_json['object'] and request_json['object']['searchableBy'] != 'https://www.w3.org/ns/activitystreams#Public':
+                post.indexable = False
+
+            if post.url:
+                post.url = remove_tracking_from_link(post.url)  # moved here as changes youtu.be to youtube.com
+                if is_video_hosting_site(post.url):
+                    post.type = constants.POST_TYPE_VIDEO
+            db.session.add(post)
+            post.ranking = post_ranking(post.score, post.posted_at)
+            community.post_count += 1
+            community.last_active = utcnow()
+            user.post_count += 1
+            db.session.commit()
+
+            # Polls need to be processed quite late because they need a post_id to refer to
+            if request_json['object']['type'] == 'Question':
+                post.type = constants.POST_TYPE_POLL
+                mode = 'single'
+                if 'anyOf' in request_json['object']:
+                    mode = 'multiple'
+                poll = Poll(post_id=post.id, end_poll=request_json['object']['endTime'], mode=mode, local_only=False)
+                db.session.add(poll)
+                i = 1
+                for choice_ap in request_json['object']['oneOf' if mode == 'single' else 'anyOf']:
+                    new_choice = PollChoice(post_id=post.id, choice_text=choice_ap['name'], sort_order=i)
+                    db.session.add(new_choice)
+                    i += 1
+                db.session.commit()
+
+            if post.image_id:
+                make_image_sizes(post.image_id, 170, 512, 'posts',
+                                 community.low_quality)  # the 512 sized image is for masonry view
+
+            # Update list of cross posts
+            if post.url:
+                other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
+                                                Post.posted_at > post.posted_at - timedelta(days=6)).all()
+                for op in other_posts:
+                    if op.cross_posts is None:
+                        op.cross_posts = [post.id]
+                    else:
+                        op.cross_posts.append(post.id)
+                    if post.cross_posts is None:
+                        post.cross_posts = [op.id]
+                    else:
+                        post.cross_posts.append(op.id)
+                db.session.commit()
+
+            if post.community_id not in communities_banned_from(user.id):
+                notify_about_post(post)
+
+            if user.reputation > 100:
+                post.up_votes += 1
+                post.score += 1
+                post.ranking = Post.post_ranking(post.score, post.posted_at)
+                db.session.commit()
+
+        return post
+
+    # All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
+    epoch = datetime(1970, 1, 1)
+
+    @classmethod
+    def epoch_seconds(self, date):
+        td = date - self.epoch
+        return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
+
+    @classmethod
+    def post_ranking(cls, score, date: datetime):
+        if date is None:
+            date = datetime.utcnow()
+        if score is None:
+            score = 1
+        order = math.log(max(abs(score), 1), 10)
+        sign = 1 if score > 0 else -1 if score < 0 else 0
+        seconds = Post.epoch_seconds(date) - 1685766018
+        return round(sign * order + seconds / 45000, 7)
+
     def delete_dependencies(self):
         db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete()
         db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete()
@@ -1420,7 +1668,7 @@ class PostReply(db.Model):
             raise Exception('Gif comment ignored')
 
         if reply_is_stupid(reply.body):
-            raise Exception('Stupid reply')
+            raise Exception('Low quality reply')
 
         db.session.add(reply)
         db.session.commit()
diff --git a/app/post/routes.py b/app/post/routes.py
index 47f5fc77..f58d2b6e 100644
--- a/app/post/routes.py
+++ b/app/post/routes.py
@@ -28,7 +28,7 @@ from app.post import bp
 from app.utils import get_setting, render_template, allowlist_html, markdown_to_html, validation_required, \
     shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \
     request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \
-    reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, moderating_communities, joined_communities, \
+    reply_already_exists, reply_is_just_link_to_gif_reaction, moderating_communities, joined_communities, \
     blocked_instances, blocked_domains, community_moderators, blocked_phrases, show_ban_message, recently_upvoted_posts, \
     recently_downvoted_posts, recently_upvoted_post_replies, recently_downvoted_post_replies, reply_is_stupid, \
     languages_for_form, menu_topics, add_to_modlog, blocked_communities, piefed_markdown_to_lemmy_markdown, \
@@ -546,11 +546,18 @@ def add_reply(post_id: int, comment_id: int):
         current_user.ip_address = ip_address()
         current_user.language_id = form.language_id.data
 
-        reply = PostReply.new(current_user, post, in_reply_to,
-                              body=piefed_markdown_to_lemmy_markdown(form.body.data),
-                              body_html=markdown_to_html(form.body.data),
-                              notify_author=form.notify_author.data,
-                              language_id=form.language_id.data)
+        try:
+            reply = PostReply.new(current_user, post, in_reply_to,
+                                  body=piefed_markdown_to_lemmy_markdown(form.body.data),
+                                  body_html=markdown_to_html(form.body.data),
+                                  notify_author=form.notify_author.data,
+                                  language_id=form.language_id.data)
+        except Exception as ex:
+            flash(_('Your reply was not accepted because %(reason)s', reason=str(ex)), 'error')
+            if in_reply_to.depth <= constants.THREAD_CUTOFF_DEPTH:
+                return redirect(url_for('activitypub.post_ap', post_id=post_id, _anchor=f'comment_{in_reply_to.id}'))
+            else:
+                return redirect(url_for('post.continue_discussion', post_id=post_id, comment_id=in_reply_to.parent_id))
 
         form.body.data = ''
         flash('Your comment has been added.')
diff --git a/app/utils.py b/app/utils.py
index 29fc7c7c..d24357d2 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -898,51 +898,8 @@ def topic_tree() -> List:
     return [topic for topic in topics_dict.values() if topic['topic'].parent_id is None]
 
 
-# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
-epoch = datetime(1970, 1, 1)
-
-def epoch_seconds(date):
-    td = date - epoch
-    return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
-
-
 def post_ranking(score, date: datetime):
-    if date is None:
-        date = datetime.utcnow()
-    if score is None:
-        score = 1
-    order = math.log(max(abs(score), 1), 10)
-    sign = 1 if score > 0 else -1 if score < 0 else 0
-    seconds = epoch_seconds(date) - 1685766018
-    return round(sign * order + seconds / 45000, 7)
-
-
-# used for ranking comments
-def _confidence(ups, downs):
-    n = ups + downs
-
-    if n == 0:
-        return 0.0
-
-    z = 1.281551565545
-    p = float(ups) / n
-
-    left = p + 1 / (2 * n) * z * z
-    right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
-    under = 1 + 1 / n * z * z
-
-    return (left - right) / under
-
-
-def confidence(ups, downs) -> float:
-    if ups is None or ups < 0:
-        ups = 0
-    if downs is None or downs < 0:
-        downs = 0
-    if ups + downs == 0:
-        return 0.0
-    else:
-        return _confidence(ups, downs)
+    return Post.post_ranking(score, date)
 
 
 def opengraph_parse(url):

From feca5992afd6fb0a88c0b6c8d0ac6b3ea06b36b6 Mon Sep 17 00:00:00 2001
From: rimu <3310831+rimu@users.noreply.github.com>
Date: Wed, 16 Oct 2024 21:42:30 +1300
Subject: [PATCH 2/4] refactor post-new wip

---
 app/community/routes.py | 24 +++++++++++++++++++++++-
 app/community/util.py   |  4 ++--
 app/models.py           |  6 +++---
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/app/community/routes.py b/app/community/routes.py
index 89857667..0e4811e3 100644
--- a/app/community/routes.py
+++ b/app/community/routes.py
@@ -21,7 +21,7 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre
     EditCommunityWikiPageForm
 from app.community.util import search_for_community, actor_to_community, \
     save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
-    delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users
+    delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string
 from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \
     SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \
     REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS
@@ -624,6 +624,8 @@ def add_post(actor, type):
         if not can_create_post(current_user, community):
             abort(401)
 
+        language = Language.query.get(form.language_id.data)
+
         request_json = {
             'id': None,
             'object': {
@@ -634,13 +636,33 @@ def add_post(actor, type):
                 'id': gibberish(),   # this will  be updated once we have the post.id
                 'mediaType': 'text/markdown',
                 'content': form.body.data,
+                'tag': tags_from_string(form.tags.data),
+                'language': {'identifier': language.code, 'name': language.name}
             }
         }
+        if type == 'link':
+            request_json['object']['attachment'] = {'type': 'Link', 'href': form.link_url.data}
+        elif type == 'image':
+            request_json['object']['attachment'] = {'type': 'Image', 'url': image_url, 'name': form.image_alt_text}
+        elif type == 'video':
+            request_json['object']['attachment'] = {'type': 'Document', 'url': form.video_url.data}
+        elif type == 'poll':
+            request_json['object']['type'] = 'Question'
+            choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5,
+                       form.choice_6, form.choice_7, form.choice_8, form.choice_9, form.choice_10]
+            key = 'oneOf' if form.mode.data == 'single' else 'anyOf'
+            request_json['object'][key] = []
+            for choice in choices:
+                choice_data = choice.data.strip()
+                if choice_data:
+                    request_json['object'][key].append({'name': choice_data})
+
         # todo: add try..except
         post = Post.new(current_user, community, request_json)
 
         community.post_count += 1
         current_user.post_count += 1
+        current_user.language_id = form.language_id.data
         community.last_active = g.site.last_active = utcnow()
         post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}"
         db.session.commit()
diff --git a/app/community/util.py b/app/community/util.py
index b865d48c..59376fd7 100644
--- a/app/community/util.py
+++ b/app/community/util.py
@@ -484,7 +484,7 @@ def end_poll_date(end_choice):
         raise ValueError("Invalid choice")
 
 
-def tags_from_string(tags: str) -> List[Tag]:
+def tags_from_string(tags: str) -> List[dict]:
     return_value = []
     tags = tags.strip()
     if tags == '':
@@ -496,7 +496,7 @@ def tags_from_string(tags: str) -> List[Tag]:
             tag = tag[1:]
         tag_to_append = find_hashtag_or_create(tag)
         if tag_to_append:
-            return_value.append(tag_to_append)
+            return_value.append({'type': 'Hashtag', 'name': tag_to_append.name})
     return return_value
 
 
diff --git a/app/models.py b/app/models.py
index 88de3df8..8587e190 100644
--- a/app/models.py
+++ b/app/models.py
@@ -635,7 +635,7 @@ class User(UserMixin, db.Model):
     verification_token = db.Column(db.String(16), index=True)
     banned = db.Column(db.Boolean, default=False)
     deleted = db.Column(db.Boolean, default=False)
-    deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
+    deleted_by = db.Column(db.Integer, index=True)
     about = db.Column(db.Text)      # markdown
     about_html = db.Column(db.Text) # html
     keywords = db.Column(db.String(256))
@@ -1079,7 +1079,7 @@ class Post(db.Model):
     microblog = db.Column(db.Boolean, default=False)
     comments_enabled = db.Column(db.Boolean, default=True)
     deleted = db.Column(db.Boolean, default=False, index=True)
-    deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
+    deleted_by = db.Column(db.Integer, index=True)
     mea_culpa = db.Column(db.Boolean, default=False)
     has_embed = db.Column(db.Boolean, default=False)
     reply_count = db.Column(db.Integer, default=0)
@@ -1602,7 +1602,7 @@ class PostReply(db.Model):
     created_at = db.Column(db.DateTime, index=True, default=utcnow)
     posted_at = db.Column(db.DateTime, index=True, default=utcnow)
     deleted = db.Column(db.Boolean, default=False, index=True)
-    deleted_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
+    deleted_by = db.Column(db.Integer, index=True)
     ip = db.Column(db.String(50))
     from_bot = db.Column(db.Boolean, default=False)
     up_votes = db.Column(db.Integer, default=0)

From 518f165c1f2363b7c9b8c9d41ef7339dd1e4c0db Mon Sep 17 00:00:00 2001
From: rimu <3310831+rimu@users.noreply.github.com>
Date: Sun, 20 Oct 2024 20:21:30 +1300
Subject: [PATCH 3/4] post-new: image posts

---
 app/community/routes.py | 52 +++++++++++++++++++++++++++++++++++------
 app/models.py           | 20 ++++------------
 2 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/app/community/routes.py b/app/community/routes.py
index 0e4811e3..b037e408 100644
--- a/app/community/routes.py
+++ b/app/community/routes.py
@@ -1,12 +1,16 @@
+import base64
+import os
 from collections import namedtuple
 from io import BytesIO
 from random import randint
 
 import flask
+from PIL import Image, ImageOps
 from flask import redirect, url_for, flash, request, make_response, session, Markup, current_app, abort, g, json, \
     jsonify
 from flask_login import current_user, login_required
 from flask_babel import _
+from pillow_heif import register_heif_opener
 from slugify import slugify
 from sqlalchemy import or_, desc, text
 
@@ -21,7 +25,8 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre
     EditCommunityWikiPageForm
 from app.community.util import search_for_community, actor_to_community, \
     save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
-    delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string
+    delete_post_from_community, delete_post_reply_from_community, community_in_list, find_local_users, tags_from_string, \
+    allowed_extensions, end_poll_date
 from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \
     SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \
     REPORT_STATE_DISCARDED, POST_TYPE_VIDEO, NOTIF_COMMUNITY, POST_TYPE_POLL, MICROBLOG_APPS
@@ -38,7 +43,7 @@ from app.utils import get_setting, render_template, allowlist_html, markdown_to_
     joined_communities, moderating_communities, blocked_domains, mimetype_from_url, blocked_instances, \
     community_moderators, communities_banned_from, show_ban_message, recently_upvoted_posts, recently_downvoted_posts, \
     blocked_users, post_ranking, languages_for_form, english_language_id, menu_topics, add_to_modlog, \
-    blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown
+    blocked_communities, remove_tracking_from_link, piefed_markdown_to_lemmy_markdown, ensure_directory_exists
 from feedgen.feed import FeedGenerator
 from datetime import timezone, timedelta
 from copy import copy
@@ -581,8 +586,8 @@ def add_post(actor, type):
         return show_ban_message()
     community = actor_to_community(actor)
 
+    post_type = POST_TYPE_ARTICLE
     if type == 'discussion':
-        post_type = POST_TYPE_ARTICLE
         form = CreateDiscussionForm()
     elif type == 'link':
         post_type = POST_TYPE_LINK
@@ -630,6 +635,7 @@ def add_post(actor, type):
             'id': None,
             'object': {
                 'name': form.title.data,
+                'type': 'Page',
                 'sticky': form.sticky.data,
                 'nsfw': form.nsfw.data,
                 'nsfl': form.nsfl.data,
@@ -641,11 +647,42 @@ def add_post(actor, type):
             }
         }
         if type == 'link':
-            request_json['object']['attachment'] = {'type': 'Link', 'href': form.link_url.data}
+            request_json['object']['attachment'] = [{'type': 'Link', 'href': form.link_url.data}]
         elif type == 'image':
-            request_json['object']['attachment'] = {'type': 'Image', 'url': image_url, 'name': form.image_alt_text}
+            uploaded_file = request.files['image_file']
+            if uploaded_file and uploaded_file.filename != '':
+                # check if this is an allowed type of file
+                file_ext = os.path.splitext(uploaded_file.filename)[1]
+                if file_ext.lower() not in allowed_extensions:
+                    abort(400, description="Invalid image type.")
+
+                new_filename = gibberish(15)
+                # set up the storage directory
+                directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
+                ensure_directory_exists(directory)
+
+                final_place = os.path.join(directory, new_filename + file_ext)
+                uploaded_file.seek(0)
+                uploaded_file.save(final_place)
+
+                if file_ext.lower() == '.heic':
+                    register_heif_opener()
+
+                Image.MAX_IMAGE_PIXELS = 89478485
+
+                # resize if necessary
+                img = Image.open(final_place)
+                if '.' + img.format.lower() in allowed_extensions:
+                    img = ImageOps.exif_transpose(img)
+
+                    # limit full sized version to 2000px
+                    img.thumbnail((2000, 2000))
+                    img.save(final_place)
+
+                request_json['object']['attachment'] = [{'type': 'Image', 'url': f'https://{current_app.config["SERVER_NAME"]}/{final_place.replace("app/", "")}',
+                                                        'name': form.image_alt_text.data}]
         elif type == 'video':
-            request_json['object']['attachment'] = {'type': 'Document', 'url': form.video_url.data}
+            request_json['object']['attachment'] = [{'type': 'Document', 'url': form.video_url.data}]
         elif type == 'poll':
             request_json['object']['type'] = 'Question'
             choices = [form.choice_1, form.choice_2, form.choice_3, form.choice_4, form.choice_5,
@@ -656,6 +693,7 @@ def add_post(actor, type):
                 choice_data = choice.data.strip()
                 if choice_data:
                     request_json['object'][key].append({'name': choice_data})
+            request_json['object']['endTime'] = end_poll_date(form.finish_in.data)
 
         # todo: add try..except
         post = Post.new(current_user, community, request_json)
@@ -1950,7 +1988,7 @@ def check_url_already_posted():
 def upvote_own_post(post):
         post.score = 1
         post.up_votes = 1
-        post.ranking = post_ranking(post.score, utcnow())
+        post.ranking = post.post_ranking(post.score, utcnow())
         vote = PostVote(user_id=current_user.id, post_id=post.id, author_id=current_user.id, effect=1)
         db.session.add(vote)
         db.session.commit()
diff --git a/app/models.py b/app/models.py
index 8587e190..76d27aca 100644
--- a/app/models.py
+++ b/app/models.py
@@ -1130,7 +1130,7 @@ class Post(db.Model):
 
     @classmethod
     def new(cls, user: User, community: Community, request_json: dict, announce_id=None):
-        from activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \
+        from app.activitypub.util import instance_weight, find_language_or_create, find_language, find_hashtag_or_create, \
             make_image_sizes, notify_about_post
         from app.utils import allowlist_html, markdown_to_html, html_to_text, microblog_content_to_title, blocked_phrases, \
             is_image_url, is_video_url, domain_from_url, opengraph_parse, shorten_string, remove_tracking_from_link, \
@@ -1160,7 +1160,8 @@ class Post(db.Model):
                     score=instance_weight(user.ap_domain),
                     instance_id=user.instance_id,
                     indexable=user.indexable,
-                    microblog=microblog
+                    microblog=microblog,
+                    posted_at=utcnow()
                     )
 
         if 'content' in request_json['object'] and request_json['object']['content'] is not None:
@@ -1305,7 +1306,7 @@ class Post(db.Model):
                 if is_video_hosting_site(post.url):
                     post.type = constants.POST_TYPE_VIDEO
             db.session.add(post)
-            post.ranking = post_ranking(post.score, post.posted_at)
+            post.ranking = post.post_ranking(post.score, post.posted_at)
             community.post_count += 1
             community.last_active = utcnow()
             user.post_count += 1
@@ -1351,7 +1352,7 @@ class Post(db.Model):
             if user.reputation > 100:
                 post.up_votes += 1
                 post.score += 1
-                post.ranking = Post.post_ranking(post.score, post.posted_at)
+                post.ranking = post.post_ranking(post.score, post.posted_at)
                 db.session.commit()
 
         return post
@@ -1364,17 +1365,6 @@ class Post(db.Model):
         td = date - self.epoch
         return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
 
-    @classmethod
-    def post_ranking(cls, score, date: datetime):
-        if date is None:
-            date = datetime.utcnow()
-        if score is None:
-            score = 1
-        order = math.log(max(abs(score), 1), 10)
-        sign = 1 if score > 0 else -1 if score < 0 else 0
-        seconds = Post.epoch_seconds(date) - 1685766018
-        return round(sign * order + seconds / 45000, 7)
-
     def delete_dependencies(self):
         db.session.query(PostBookmark).filter(PostBookmark.post_id == self.id).delete()
         db.session.query(PollChoiceVote).filter(PollChoiceVote.post_id == self.id).delete()

From 1d77d42946b4e3b26a84b58badd94834c3010d2e Mon Sep 17 00:00:00 2001
From: rimu <3310831+rimu@users.noreply.github.com>
Date: Mon, 21 Oct 2024 09:55:39 +1300
Subject: [PATCH 4/4] post-new: do not double count

---
 app/community/routes.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/community/routes.py b/app/community/routes.py
index b037e408..ebafeff4 100644
--- a/app/community/routes.py
+++ b/app/community/routes.py
@@ -698,10 +698,8 @@ def add_post(actor, type):
         # todo: add try..except
         post = Post.new(current_user, community, request_json)
 
-        community.post_count += 1
-        current_user.post_count += 1
         current_user.language_id = form.language_id.data
-        community.last_active = g.site.last_active = utcnow()
+        g.site.last_active = utcnow()
         post.ap_id = f"https://{current_app.config['SERVER_NAME']}/post/{post.id}"
         db.session.commit()