rework update_post_from_activity #351

This commit is contained in:
freamon 2024-11-27 15:29:22 +00:00
parent e28550427b
commit 01e28a20b5

View file

@ -32,7 +32,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
microblog_content_to_title, generate_image_from_video_url, is_video_url, \ microblog_content_to_title, generate_image_from_video_url, is_video_url, \
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
html_to_text, add_to_modlog_activitypub, joined_communities, \ html_to_text, add_to_modlog_activitypub, joined_communities, \
moderating_communities, get_task_session moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
from sqlalchemy import or_ from sqlalchemy import or_
@ -1753,13 +1753,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict):
def update_post_from_activity(post: Post, request_json: dict): def update_post_from_activity(post: Post, request_json: dict):
if 'name' not in request_json['object']: # Microblog posts # redo body without checking if it's changed
name = "[Microblog]"
else:
name = request_json['object']['name']
nsfl_in_title = '[NSFL]' in name.upper() or '(NSFL)' in name.upper()
post.title = name
if 'content' in request_json['object'] and request_json['object']['content'] is not None: if 'content' in request_json['object'] and request_json['object']['content'] is not None:
if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html': if 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/html':
post.body_html = allowlist_html(request_json['object']['content']) post.body_html = allowlist_html(request_json['object']['content'])
@ -1776,90 +1770,138 @@ def update_post_from_activity(post: Post, request_json: dict):
request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>' request_json['object']['content'] = '<p>' + request_json['object']['content'] + '</p>'
post.body_html = allowlist_html(request_json['object']['content']) post.body_html = allowlist_html(request_json['object']['content'])
post.body = html_to_text(post.body_html) post.body = html_to_text(post.body_html)
if name == "[Microblog]":
# title
old_title = post.title
if 'name' in request_json['object']:
new_title = request_json['object']['name']
post.microblog = False
else:
autogenerated_title = microblog_content_to_title(post.body_html) autogenerated_title = microblog_content_to_title(post.body_html)
if len(autogenerated_title) < 20: if len(autogenerated_title) < 20:
name += ' ' + autogenerated_title new_title = '[Microblog] ' + autogenerated_title.strip()
else: else:
name = autogenerated_title new_title = autogenerated_title.strip()
nsfl_in_title = '[NSFL]' in name.upper() or '(NSFL)' in name.upper() post.microblog = True
post.title = name
if old_title != new_title:
post.title = new_title
if '[NSFL]' in new_title.upper() or '(NSFL)' in new_title.upper():
post.nsfl = True
if '[NSFW]' in new_title.upper() or '(NSFW)' in new_title.upper():
post.nsfw = True
if 'sensitive' in request_json['object']:
post.nsfw = request_json['object']['sensitive']
if 'nsfl' in request_json['object']:
post.nsfl = request_json['object']['nsfl']
# Language # Language
old_language_id = post.language_id
new_language = None
if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict): if 'language' in request_json['object'] and isinstance(request_json['object']['language'], dict):
language = find_language_or_create(request_json['object']['language']['identifier'], request_json['object']['language']['name']) new_language = find_language_or_create(request_json['object']['language']['identifier'], request_json['object']['language']['name'])
post.language_id = language.id elif 'contentMap' in request_json['object'] and isinstance(request_json['object']['contentMap'], dict):
new_language = find_language(next(iter(request_json['object']['contentMap'])))
if new_language and (new_language.id != old_language_id):
post.language_id = new_language.id
# Tags
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
db.session.execute(text('DELETE FROM "post_tag" WHERE post_id = :post_id'), {'post_id': post.id})
for json_tag in request_json['object']['tag']:
if json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != post.community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
post.comments_enabled = request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True
post.edited_at = utcnow()
if request_json['object']['type'] == 'Video':
# return now for PeerTube, otherwise rest of this function breaks the post
# consider querying the Likes endpoint (that mostly seems to be what Updates are about)
return
# Links # Links
old_url = post.url old_url = post.url
old_image_id = post.image_id new_url = None
post.url = ''
if request_json['object']['type'] == 'Video':
post.type = POST_TYPE_VIDEO
# PeerTube URL isn't going to change, so set to old_url to prevent this function changing type or icon
post.url = old_url
if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \ if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \
'type' in request_json['object']['attachment'][0]: 'type' in request_json['object']['attachment'][0]:
alt_text = None
if request_json['object']['attachment'][0]['type'] == 'Link': if request_json['object']['attachment'][0]['type'] == 'Link':
post.url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4 new_url = request_json['object']['attachment'][0]['href'] # Lemmy < 0.19.4
if request_json['object']['attachment'][0]['type'] == 'Document': if request_json['object']['attachment'][0]['type'] == 'Document':
post.url = request_json['object']['attachment'][0]['url'] # Mastodon new_url = request_json['object']['attachment'][0]['url'] # Mastodon
if 'name' in request_json['object']['attachment'][0]:
alt_text = request_json['object']['attachment'][0]['name']
if request_json['object']['attachment'][0]['type'] == 'Image': if request_json['object']['attachment'][0]['type'] == 'Image':
post.url = request_json['object']['attachment'][0]['url'] # PixelFed / PieFed / Lemmy >= 0.19.4 new_url = request_json['object']['attachment'][0]['url'] # PixelFed / PieFed / Lemmy >= 0.19.4
if 'name' in request_json['object']['attachment'][0]: if new_url:
alt_text = request_json['object']['attachment'][0]['name'] new_url = remove_tracking_from_link(new_url)
if post.url == '': new_domain = domain_from_url(new_url)
post.type = POST_TYPE_ARTICLE if new_domain.banned:
else: db.session.commit()
post.url = remove_tracking_from_link(post.url) return # reject change to url if new domain is banned
if (post.url and post.url != old_url) or (post.url == '' and old_url != ''): old_db_entry_to_delete = None
if post.image_id: if old_url != new_url:
old_image = File.query.get(post.image_id) if post.image:
post.image_id = None post.image.delete_from_disk()
old_image.delete_from_disk() old_db_entry_to_delete = post.image_id
File.query.filter_by(id=old_image_id).delete() if new_url:
post.image = None post.url = new_url
if (post.url and post.url != old_url): image = None
if is_image_url(post.url): if is_image_url(new_url):
post.type = POST_TYPE_IMAGE post.type = POST_TYPE_IMAGE
image = File(source_url=new_url)
if 'name' in request_json['object']['attachment'][0] and request_json['object']['attachment'][0]['name'] is not None:
image.alt_text = request_json['object']['attachment'][0]['name']
elif is_video_url(new_url):
post.type = POST_TYPE_VIDEO
image = File(source_url=new_url)
else:
if 'image' in request_json['object'] and 'url' in request_json['object']['image']: if 'image' in request_json['object'] and 'url' in request_json['object']['image']:
image = File(source_url=request_json['object']['image']['url']) image = File(source_url=request_json['object']['image']['url'])
else: else:
image = File(source_url=post.url) # Let's see if we can do better than the source instance did!
if alt_text: tn_url = new_url
image.alt_text = alt_text if tn_url[:32] == 'https://www.youtube.com/watch?v=':
db.session.add(image) tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com
post.image = image opengraph = opengraph_parse(tn_url)
elif is_video_url(post.url): if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):
image = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
if is_video_hosting_site(new_url):
post.type = POST_TYPE_VIDEO post.type = POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
post.image = image
else: else:
post.type = POST_TYPE_LINK post.type = POST_TYPE_LINK
domain = domain_from_url(post.url) if image:
db.session.add(image)
db.session.commit()
post.image = image
make_image_sizes(image.id, 170, 512, 'posts') # the 512 sized image is for masonry view
else:
old_db_entry_to_delete = None
# url domain
old_domain = domain_from_url(old_url) if old_url else None
if old_domain != new_domain:
# notify about links to banned websites. # notify about links to banned websites.
already_notified = set() # often admins and mods are the same people - avoid notifying them twice already_notified = set() # often admins and mods are the same people - avoid notifying them twice
if domain.notify_mods: if new_domain.notify_mods:
for community_member in post.community.moderators(): for community_member in post.community.moderators():
notify = Notification(title='Suspicious content', url=post.ap_id, notify = Notification(title='Suspicious content', url=post.ap_id,
user_id=community_member.user_id, user_id=community_member.user_id,
author_id=1) author_id=1)
db.session.add(notify) db.session.add(notify)
already_notified.add(community_member.user_id) already_notified.add(community_member.user_id)
if domain.notify_admins: if new_domain.notify_admins:
for admin in Site.admins(): for admin in Site.admins():
if admin.id not in already_notified: if admin.id not in already_notified:
notify = Notification(title='Suspicious content', notify = Notification(title='Suspicious content',
url=post.ap_id, user_id=admin.id, url=post.ap_id, user_id=admin.id,
author_id=1) author_id=1)
db.session.add(notify) db.session.add(notify)
if not domain.banned: new_domain.post_count += 1
domain.post_count += 1 post.domain = new_domain
post.domain = domain
else:
post.url = old_url # don't change if url changed from non-banned domain to banned domain
# Fix-up cross posts (Posts which link to the same url as other posts) # Fix-up cross posts (Posts which link to the same url as other posts)
if post.cross_posts is not None: if post.cross_posts is not None:
@ -1869,7 +1911,7 @@ def update_post_from_activity(post: Post, request_json: dict):
if ocp.cross_posts is not None and post.id in ocp.cross_posts: if ocp.cross_posts is not None and post.id in ocp.cross_posts:
ocp.cross_posts.remove(post.id) ocp.cross_posts.remove(post.id)
new_cross_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, Post.deleted == False, new_cross_posts = Post.query.filter(Post.id != post.id, Post.url == new_url, Post.deleted == False,
Post.posted_at > utcnow() - timedelta(days=6)).all() Post.posted_at > utcnow() - timedelta(days=6)).all()
for ncp in new_cross_posts: for ncp in new_cross_posts:
if ncp.cross_posts is None: if ncp.cross_posts is None:
@ -1881,33 +1923,19 @@ def update_post_from_activity(post: Post, request_json: dict):
else: else:
post.cross_posts.append(ncp.id) post.cross_posts.append(ncp.id)
if post is not None: else:
if 'image' in request_json['object'] and post.image is None: post.type = POST_TYPE_ARTICLE
image = File(source_url=request_json['object']['image']['url']) post.url = ''
db.session.add(image) if post.cross_posts is not None: # unlikely, but not impossible
db.session.commit() old_cross_posts = Post.query.filter(Post.id.in_(post.cross_posts)).all()
post.image_id = image.id post.cross_posts.clear()
db.session.add(post) for ocp in old_cross_posts:
db.session.commit() if ocp.cross_posts is not None and post.id in ocp.cross_posts:
ocp.cross_posts.remove(post.id)
if post.image_id and post.image_id != old_image_id: db.session.commit()
make_image_sizes(post.image_id, 170, 512, 'posts') # the 512 sized image is for masonry view if old_db_entry_to_delete:
if 'sensitive' in request_json['object']: File.query.filter_by(id=old_db_entry_to_delete).delete()
post.nsfw = request_json['object']['sensitive']
if nsfl_in_title:
post.nsfl = True
elif 'nsfl' in request_json['object']:
post.nsfl = request_json['object']['nsfl']
if 'tag' in request_json['object'] and isinstance(request_json['object']['tag'], list):
db.session.execute(text('DELETE FROM "post_tag" WHERE post_id = :post_id'), {'post_id': post.id})
for json_tag in request_json['object']['tag']:
if json_tag['type'] == 'Hashtag':
if json_tag['name'][1:].lower() != post.community.name.lower(): # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create(json_tag['name'])
if hashtag:
post.tags.append(hashtag)
post.comments_enabled = request_json['object']['commentsEnabled'] if 'commentsEnabled' in request_json['object'] else True
post.edited_at = utcnow()
db.session.commit() db.session.commit()