rework update_post_from_activity #351

2025-01-23 19:36:56 -08:00 · 2024-11-27 15:29:22 +00:00 · 2024-11-27 15:29:22 +00:00 · 01e28a20b5
commit 01e28a20b5
parent e28550427b
1 changed files with 152 additions and 124 deletions
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@ -32,7 +32,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
    microblog_content_to_title, generate_image_from_video_url, is_video_url, \
    notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
    html_to_text, add_to_modlog_activitypub, joined_communities, \
-    moderating_communities, get_task_session
+    moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
 from sqlalchemy import or_
@ -1753,13 +1753,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict):
 def update_post_from_activity(post: Post, request_json: dict):
-    if 'name' not in request_json['object']:    # Microblog posts
+    # redo body without checking if it's changed
        name = "[Microblog]"
    else:
        name = request_json['object']['name']
    nsfl_in_title = '[NSFL]' in name.upper() or '(NSFL)' in name.upper()
    post.title = name
    if 'content' in request_json['object'] and request_json['object']['content'] is not None:
        if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
            post.body_html = allowlist_html(request_json['object']['content'])
@ -1776,90 +1770,138 @@ def update_post_from_activity(post: Post, request_json: dict):
                request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
            post.body_html = allowlist_html(request_json['object']['content'])
            post.body = html_to_text(post.body_html)
-        if name == "[Microblog]":
+
    # title
    old_title = post.title
    if 'name' in request_json['object']:
        new_title = request_json['object']['name']
        post.microblog = False
    else:
        autogenerated_title = microblog_content_to_title(post.body_html)
        if len(autogenerated_title) < 20:
-                name += ' ' + autogenerated_title
+            new_title = '[Microblog] ' + autogenerated_title.strip()
        else:
-                name = autogenerated_title
+            new_title = autogenerated_title.strip()
-            nsfl_in_title = '[NSFL]' in name.upper() or '(NSFL)' in name.upper()
+        post.microblog = True
-            post.title = name
+
    if old_title != new_title:
        post.title = new_title
        if '[NSFL]' in new_title.upper() or '(NSFL)' in new_title.upper():
            post.nsfl = True
        if '[NSFW]' in new_title.upper() or '(NSFW)' in new_title.upper():
            post.nsfw = True
    if 'sensitive' in request_json['object']:
        post.nsfw = request_json['object']['sensitive']
    if 'nsfl' in request_json['object']:
        post.nsfl = request_json['object']['nsfl']
    # Language
    old_language_id = post.language_id
    new_language = None
    if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
-        language = find_language_or_create(request_json['object']['language']['identifier'], request_json['object']['language']['name'])
+        new_language = find_language_or_create(request_json['object']['language']['identifier'], request_json['object']['language']['name'])
-        post.language_id = language.id
+    elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
        new_language = find_language(next(iter(request_json['object']['contentMap'])))
    if new_language and (new_language.id != old_language_id):
        post.language_id = new_language.id
    # Tags
    if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
        db.session.execute(text('DELETE FROM "post_tag" WHERE post_id = :post_id'), {'post_id': post.id})
        for json_tag in request_json['object']['tag']:
            if json_tag['type'] == 'Hashtag':
                if json_tag['name'][1:].lower() != post.community.name.lower():             # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
                    hashtag = find_hashtag_or_create(json_tag['name'])
                    if hashtag:
                        post.tags.append(hashtag)
    post.comments_enabled = request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True
    post.edited_at = utcnow()
    if request_json['object']['type'] == 'Video':
        # return now for PeerTube, otherwise rest of this function breaks the post
        # consider querying the Likes endpoint (that mostly seems to be what Updates are about)
        return
    # Links
    old_url = post.url
-    old_image_id = post.image_id
+    new_url = None
    post.url = ''
    if request_json['object']['type'] == 'Video':
        post.type = POST_TYPE_VIDEO
        # PeerTube URL isn't going to change, so set to old_url to prevent this function changing type or icon
        post.url = old_url
    if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
            'type' in request_json['object']['attachment'][0]:
        alt_text = None
        if request_json['object']['attachment'][0]['type'] == 'Link':
-            post.url = request_json['object']['attachment'][0]['href']              # Lemmy < 0.19.4
+            new_url = request_json['object']['attachment'][0]['href']              # Lemmy < 0.19.4
        if request_json['object']['attachment'][0]['type'] == 'Document':
-            post.url = request_json['object']['attachment'][0]['url']               # Mastodon
+            new_url = request_json['object']['attachment'][0]['url']               # Mastodon
            if 'name' in request_json['object']['attachment'][0]:
                alt_text = request_json['object']['attachment'][0]['name']
        if request_json['object']['attachment'][0]['type'] == 'Image':
-            post.url = request_json['object']['attachment'][0]['url']               # PixelFed / PieFed / Lemmy >= 0.19.4
+            new_url = request_json['object']['attachment'][0]['url']               # PixelFed / PieFed / Lemmy >= 0.19.4
-            if 'name' in request_json['object']['attachment'][0]:
+    if new_url:
-                alt_text = request_json['object']['attachment'][0]['name']
+        new_url = remove_tracking_from_link(new_url)
-    if post.url == '':
+        new_domain = domain_from_url(new_url)
-        post.type = POST_TYPE_ARTICLE
+        if new_domain.banned:
-    else:
+            db.session.commit()
-        post.url = remove_tracking_from_link(post.url)
+            return                                                                  # reject change to url if new domain is banned
-    if (post.url and post.url != old_url) or (post.url == '' and old_url != ''):
+    old_db_entry_to_delete = None
-        if post.image_id:
+    if old_url != new_url:
-            old_image = File.query.get(post.image_id)
+        if post.image:
-            post.image_id = None
+            post.image.delete_from_disk()
-            old_image.delete_from_disk()
+            old_db_entry_to_delete = post.image_id
-            File.query.filter_by(id=old_image_id).delete()
+        if new_url:
-            post.image = None
+            post.url = new_url
-    if (post.url and post.url != old_url):
+            image = None
-        if is_image_url(post.url):
+            if is_image_url(new_url):
                post.type = POST_TYPE_IMAGE
                image = File(source_url=new_url)
                if 'name' in request_json['object']['attachment'][0] and request_json['object']['attachment'][0]['name'] is not None:
                    image.alt_text = request_json['object']['attachment'][0]['name']
            elif is_video_url(new_url):
                post.type = POST_TYPE_VIDEO
                image = File(source_url=new_url)
            else:
                if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
                    image = File(source_url=request_json['object']['image']['url'])
                else:
-                image = File(source_url=post.url)
+                    # Let's see if we can do better than the source instance did!
-                if alt_text:
+                    tn_url = new_url
-                    image.alt_text = alt_text
+                    if tn_url[:32] == 'https://www.youtube.com/watch?v=':
-            db.session.add(image)
+                        tn_url = 'https://youtu.be/' + tn_url[32:43]  # better chance of thumbnail from youtu.be than youtube.com
-            post.image = image
+                    opengraph = opengraph_parse(tn_url)
-        elif is_video_url(post.url):
+                    if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
                        filename = opengraph.get('og:image') or opengraph.get('og:image:url')
                        if not filename.startswith('/'):
                            image = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
                if is_video_hosting_site(new_url):
                    post.type = POST_TYPE_VIDEO
            image = File(source_url=post.url)
            db.session.add(image)
            post.image = image
                else:
                    post.type = POST_TYPE_LINK
-        domain = domain_from_url(post.url)
+            if image:
                db.session.add(image)
                db.session.commit()
                post.image = image
                make_image_sizes(image.id, 170, 512, 'posts')  # the 512 sized image is for masonry view
            else:
                old_db_entry_to_delete = None
            # url domain
            old_domain = domain_from_url(old_url) if old_url else None
            if old_domain != new_domain:
                # notify about links to banned websites.
                already_notified = set()  # often admins and mods are the same people - avoid notifying them twice
-        if domain.notify_mods:
+                if new_domain.notify_mods:
                    for community_member in post.community.moderators():
                        notify = Notification(title='Suspicious content', url=post.ap_id,
                                                  user_id=community_member.user_id,
                                                  author_id=1)
                        db.session.add(notify)
                        already_notified.add(community_member.user_id)
-        if domain.notify_admins:
+                if new_domain.notify_admins:
                    for admin in Site.admins():
                        if admin.id not in already_notified:
                            notify = Notification(title='Suspicious content',
                                                      url=post.ap_id, user_id=admin.id,
                                                      author_id=1)
                            db.session.add(notify)
-        if not domain.banned:
+                new_domain.post_count += 1
-            domain.post_count += 1
+                post.domain = new_domain
            post.domain = domain
        else:
            post.url = old_url              # don't change if url changed from non-banned domain to banned domain
            # Fix-up cross posts (Posts which link to the same url as other posts)
            if post.cross_posts is not None:
@ -1869,7 +1911,7 @@ def update_post_from_activity(post: Post, request_json: dict):
                    if ocp.cross_posts is not None and post.id in ocp.cross_posts:
                        ocp.cross_posts.remove(post.id)
-        new_cross_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False,
+            new_cross_posts = Post.query.filter(Post.id != post.id, Post.url == new_url, Post.deleted == False,
                                    Post.posted_at > utcnow() - timedelta(days=6)).all()
            for ncp in new_cross_posts:
                if ncp.cross_posts is None:
@ -1881,33 +1923,19 @@ def update_post_from_activity(post: Post, request_json: dict):
                else:
                    post.cross_posts.append(ncp.id)
-    if post is not None:
+        else:
-        if 'image' in request_json['object'] and post.image is None:
+            post.type = POST_TYPE_ARTICLE
-            image = File(source_url=request_json['object']['image']['url'])
+            post.url = ''
-            db.session.add(image)
+            if post.cross_posts is not None:                    # unlikely, but not impossible
-            db.session.commit()
+                old_cross_posts = Post.query.filter(Post.id.in_(post.cross_posts)).all()
-            post.image_id = image.id
+                post.cross_posts.clear()
-            db.session.add(post)
+                for ocp in old_cross_posts:
-            db.session.commit()
+                    if ocp.cross_posts is not None and post.id in ocp.cross_posts:
                        ocp.cross_posts.remove(post.id)
-        if post.image_id and post.image_id != old_image_id:
+    db.session.commit()
-            make_image_sizes(post.image_id, 170, 512, 'posts')  # the 512 sized image is for masonry view
+    if old_db_entry_to_delete:
-    if 'sensitive' in request_json['object']:
+        File.query.filter_by(id=old_db_entry_to_delete).delete()
        post.nsfw = request_json['object']['sensitive']
    if nsfl_in_title:
        post.nsfl = True
    elif 'nsfl' in request_json['object']:
        post.nsfl = request_json['object']['nsfl']
    if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
        db.session.execute(text('DELETE FROM "post_tag" WHERE post_id = :post_id'), {'post_id': post.id})
        for json_tag in request_json['object']['tag']:
            if json_tag['type'] == 'Hashtag':
                if json_tag['name'][1:].lower() != post.community.name.lower():             # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
                    hashtag = find_hashtag_or_create(json_tag['name'])
                    if hashtag:
                        post.tags.append(hashtag)
    post.comments_enabled = request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True
    post.edited_at = utcnow()
        db.session.commit()