pyfedi/app/community/util.py

599 lines
27 KiB
Python
Raw Normal View History

from datetime import datetime, timedelta
from threading import Thread
from time import sleep
2023-10-10 22:25:37 +13:00
from typing import List
import requests
from PIL import Image, ImageOps
from flask import request, abort, g, current_app, json
from flask_login import current_user
from pillow_heif import register_heif_opener
from app import db, cache, celery
from app.activitypub.signature import post_request
2024-03-15 14:24:45 +13:00
from app.activitypub.util import find_actor_or_create, actor_json_to_model, post_json_to_model, default_context
2024-04-16 20:59:58 +12:00
from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE, POST_TYPE_VIDEO
from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \
Instance, Notification, User, ActivityPubLog
2024-01-25 20:16:08 +13:00
from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \
is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, \
2024-03-22 12:22:19 +13:00
remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases
from sqlalchemy import func, desc
2023-11-27 22:05:35 +13:00
import os
allowed_extensions = ['.gif', '.jpg', '.jpeg', '.png', '.webp', '.heic']
def search_for_community(address: str):
if address.startswith('!'):
name, server = address[1:].split('@')
banned = BannedInstances.query.filter_by(domain=server).first()
if banned:
reason = f" Reason: {banned.reason}" if banned.reason is not None else ''
2023-09-05 20:25:02 +12:00
raise Exception(f"{server} is blocked.{reason}") # todo: create custom exception class hierarchy
already_exists = Community.query.filter_by(ap_id=address[1:]).first()
if already_exists:
return already_exists
# Look up the profile address of the community using WebFinger
# todo: try, except block around every get_request
webfinger_data = get_request(f"https://{server}/.well-known/webfinger",
params={'resource': f"acct:{address[1:]}"})
if webfinger_data.status_code == 200:
webfinger_json = webfinger_data.json()
for links in webfinger_json['links']:
2023-09-05 20:25:02 +12:00
if 'rel' in links and links['rel'] == 'self': # this contains the URL of the activitypub profile
type = links['type'] if 'type' in links else 'application/activity+json'
# retrieve the activitypub profile
community_data = get_request(links['href'], headers={'Accept': type})
# to see the structure of the json contained in community_data, do a GET to https://lemmy.world/c/technology with header Accept: application/activity+json
if community_data.status_code == 200:
community_json = community_data.json()
community_data.close()
if community_json['type'] == 'Group':
community = actor_json_to_model(community_json, name, server)
if community:
if current_app.debug:
retrieve_mods_and_backfill(community.id)
else:
retrieve_mods_and_backfill.delay(community.id)
return community
return None
2023-09-05 20:25:02 +12:00
@celery.task
def retrieve_mods_and_backfill(community_id: int):
with current_app.app_context():
community = Community.query.get(community_id)
site = Site.query.get(1)
if community.ap_moderators_url:
mods_request = get_request(community.ap_moderators_url, headers={'Accept': 'application/activity+json'})
if mods_request.status_code == 200:
mods_data = mods_request.json()
mods_request.close()
if mods_data and mods_data['type'] == 'OrderedCollection' and 'orderedItems' in mods_data:
for actor in mods_data['orderedItems']:
sleep(0.5)
user = find_actor_or_create(actor)
if user:
existing_membership = CommunityMember.query.filter_by(community_id=community.id, user_id=user.id).first()
if existing_membership:
existing_membership.is_moderator = True
else:
new_membership = CommunityMember(community_id=community.id, user_id=user.id, is_moderator=True)
db.session.add(new_membership)
db.session.commit()
# only backfill nsfw if nsfw communities are allowed
if (community.nsfw and not site.enable_nsfw) or (community.nsfl and not site.enable_nsfl):
return
# download 50 old posts
if community.ap_public_url:
2024-02-21 08:36:47 +13:00
outbox_request = get_request(community.ap_outbox_url, headers={'Accept': 'application/activity+json'})
if outbox_request.status_code == 200:
outbox_data = outbox_request.json()
outbox_request.close()
if 'type' in outbox_data and outbox_data['type'] == 'OrderedCollection' and 'orderedItems' in outbox_data:
activities_processed = 0
for activity in outbox_data['orderedItems']:
user = find_actor_or_create(activity['object']['actor'])
activity_log = ActivityPubLog(direction='in', activity_id=activity['id'], activity_type='Announce', result='failure')
if site.log_activitypub_json:
activity_log.activity_json = json.dumps(activity)
db.session.add(activity_log)
if user:
post = post_json_to_model(activity_log, activity['object']['object'], user, community)
if post:
post.ap_create_id = activity['object']['id']
post.ap_announce_id = activity['id']
post.ranking = post_ranking(post.score, post.posted_at)
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url,
Post.posted_at > post.posted_at - timedelta(days=3),
Post.posted_at < post.posted_at + timedelta(days=3)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
else:
op.cross_posts.append(post.id)
if post.cross_posts is None:
post.cross_posts = [op.id]
else:
post.cross_posts.append(op.id)
db.session.commit()
else:
activity_log.exception_message = 'Could not find or create actor'
db.session.commit()
activities_processed += 1
if activities_processed >= 50:
break
2023-12-22 14:05:39 +13:00
c = Community.query.get(community.id)
if c.post_count > 0:
c.last_active = Post.query.filter(Post.community_id == community_id).order_by(desc(Post.posted_at)).first().posted_at
db.session.commit()
if community.ap_featured_url:
2024-03-22 20:49:35 +13:00
featured_request = get_request(community.ap_featured_url, headers={'Accept': 'application/activity+json'})
if featured_request.status_code == 200:
featured_data = featured_request.json()
featured_request.close()
if featured_data['type'] == 'OrderedCollection' and 'orderedItems' in featured_data:
for item in featured_data['orderedItems']:
featured_id = item['id']
p = Post.query.filter(Post.ap_id == featured_id).first()
if p:
p.sticky = True
db.session.commit()
2023-09-05 20:25:02 +12:00
def community_url_exists(url) -> bool:
community = Community.query.filter(Community.ap_profile_id == url.lower()).first()
2023-09-05 20:25:02 +12:00
return community is not None
2023-09-17 21:19:51 +12:00
def actor_to_community(actor) -> Community:
actor = actor.strip()
if '@' in actor:
community = Community.query.filter_by(banned=False, ap_id=actor).first()
else:
community = Community.query.filter(func.lower(Community.name) == func.lower(actor)).filter_by(banned=False, ap_id=None).first()
2023-09-17 21:19:51 +12:00
return community
2023-10-10 22:25:37 +13:00
def opengraph_parse(url):
2024-01-09 20:44:08 +13:00
if '?' in url:
url = url.split('?')
url = url[0]
try:
return parse_page(url)
except Exception as ex:
return None
def url_to_thumbnail_file(filename) -> File:
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
unused, file_extension = os.path.splitext(filename_for_extension)
response = requests.get(filename, timeout=5)
if response.status_code == 200:
new_filename = gibberish(15)
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
final_place = os.path.join(directory, new_filename + file_extension)
with open(final_place, 'wb') as f:
f.write(response.content)
2024-01-09 20:44:08 +13:00
response.close()
2024-01-25 20:16:08 +13:00
Image.MAX_IMAGE_PIXELS = 89478485
with Image.open(final_place) as img:
img = ImageOps.exif_transpose(img)
img.thumbnail((150, 150))
img.save(final_place)
thumbnail_width = img.width
thumbnail_height = img.height
return File(file_name=new_filename + file_extension, thumbnail_width=thumbnail_width,
thumbnail_height=thumbnail_height, thumbnail_path=final_place,
source_url=filename)
def save_post(form, post: Post, type: str):
2024-03-12 20:58:47 +13:00
post.indexable = current_user.indexable
post.sticky = form.sticky.data
post.nsfw = form.nsfw.data
post.nsfl = form.nsfl.data
post.notify_author = form.notify_author.data
if type == '' or type == 'discussion':
post.title = form.discussion_title.data
post.body = form.discussion_body.data
post.body_html = markdown_to_html(post.body)
post.type = POST_TYPE_ARTICLE
elif type == 'link':
post.title = form.link_title.data
post.body = form.link_body.data
post.body_html = markdown_to_html(post.body)
url_changed = post.id is None or form.link_url.data != post.url
2024-04-03 07:44:59 +13:00
post.url = remove_tracking_from_link(form.link_url.data.strip())
post.type = POST_TYPE_LINK
domain = domain_from_url(form.link_url.data)
domain.post_count += 1
post.domain = domain
if url_changed:
if post.image_id:
remove_old_file(post.image_id)
post.image_id = None
2024-01-25 20:16:08 +13:00
if post.url.endswith('.mp4') or post.url.endswith('.webm'):
file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image
post.image = file
db.session.add(file)
else:
unused, file_extension = os.path.splitext(form.link_url.data)
# this url is a link to an image - turn it into a image post
if file_extension.lower() in allowed_extensions:
file = File(source_url=form.link_url.data)
post.image = file
db.session.add(file)
post.type = POST_TYPE_IMAGE
else:
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
opengraph = opengraph_parse(form.link_url.data)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
unused, file_extension = os.path.splitext(filename_for_extension)
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
file = url_to_thumbnail_file(filename)
if file:
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
post.image = file
db.session.add(file)
elif type == 'image':
post.title = form.image_title.data
post.body = form.image_body.data
post.body_html = markdown_to_html(post.body)
post.type = POST_TYPE_IMAGE
2024-01-26 17:15:43 +13:00
alt_text = form.image_alt_text.data if form.image_alt_text.data else form.image_title.data
uploaded_file = request.files['image_file']
if uploaded_file and uploaded_file.filename != '':
if post.image_id:
remove_old_file(post.image_id)
post.image_id = None
# check if this is an allowed type of file
file_ext = os.path.splitext(uploaded_file.filename)[1]
2024-01-25 20:16:08 +13:00
if file_ext.lower() not in allowed_extensions:
abort(400)
new_filename = gibberish(15)
# set up the storage directory
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# save the file
final_place = os.path.join(directory, new_filename + file_ext)
2024-03-25 13:30:18 +13:00
final_place_medium = os.path.join(directory, new_filename + '_medium.webp')
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
2024-01-25 20:16:08 +13:00
uploaded_file.seek(0)
uploaded_file.save(final_place)
if file_ext.lower() == '.heic':
register_heif_opener()
2024-01-26 09:37:05 +13:00
Image.MAX_IMAGE_PIXELS = 89478485
2024-01-25 20:16:08 +13:00
# resize if necessary
img = Image.open(final_place)
2024-01-25 20:16:08 +13:00
if '.' + img.format.lower() in allowed_extensions:
img = ImageOps.exif_transpose(img)
img_width = img.width
img_height = img.height
2024-03-25 13:30:18 +13:00
img.thumbnail((2000, 2000))
img.save(final_place)
if img.width > 512 or img.height > 512:
img.thumbnail((512, 512))
2024-03-25 13:30:18 +13:00
img.save(final_place_medium, format="WebP", quality=93)
2024-01-25 20:16:08 +13:00
img_width = img.width
img_height = img.height
# save a second, smaller, version as a thumbnail
img.thumbnail((150, 150))
2024-01-25 20:16:08 +13:00
img.save(final_place_thumbnail, format="WebP", quality=93)
thumbnail_width = img.width
thumbnail_height = img.height
2024-03-25 13:30:18 +13:00
file = File(file_path=final_place_medium, file_name=new_filename + file_ext, alt_text=alt_text,
2024-01-25 20:16:08 +13:00
width=img_width, height=img_height, thumbnail_width=thumbnail_width,
thumbnail_height=thumbnail_height, thumbnail_path=final_place_thumbnail,
source_url=final_place.replace('app/static/', f"https://{current_app.config['SERVER_NAME']}/static/"))
post.image = file
db.session.add(file)
2024-04-16 20:59:58 +12:00
elif type == 'video':
post.title = form.video_title.data
post.body = form.video_body.data
post.body_html = markdown_to_html(post.body)
url_changed = post.id is None or form.video_url.data != post.url
post.url = remove_tracking_from_link(form.video_url.data.strip())
post.type = POST_TYPE_VIDEO
domain = domain_from_url(form.video_url.data)
domain.post_count += 1
post.domain = domain
if url_changed:
if post.image_id:
remove_old_file(post.image_id)
post.image_id = None
file = File(source_url=form.video_url.data) # make_image_sizes() will take care of turning this into a still image
post.image = file
db.session.add(file)
2024-01-25 20:16:08 +13:00
elif type == 'poll':
...
else:
raise Exception('invalid post type')
2024-04-16 20:59:58 +12:00
if post.id is None:
if current_user.reputation > 100:
post.up_votes = 1
post.score = 1
if current_user.reputation < -100:
post.score = -1
2024-01-03 20:14:39 +13:00
post.ranking = post_ranking(post.score, utcnow())
2024-03-22 12:22:19 +13:00
# Filter by phrase
blocked_phrases_list = blocked_phrases()
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.title:
abort(401)
return
if post.body:
for blocked_phrase in blocked_phrases_list:
if blocked_phrase in post.body:
abort(401)
return
db.session.add(post)
2024-01-03 20:14:39 +13:00
g.site.last_active = utcnow()
2024-03-15 14:24:45 +13:00
def delete_post_from_community(post_id):
if current_app.debug:
delete_post_from_community_task(post_id)
else:
delete_post_from_community_task.delay(post_id)
@celery.task
def delete_post_from_community_task(post_id):
post = Post.query.get(post_id)
community = post.community
post.delete_dependencies()
post.flush_cache()
db.session.delete(post)
db.session.commit()
if not community.local_only:
delete_json = {
'id': f"https://{current_app.config['SERVER_NAME']}/activities/delete/{gibberish(15)}",
'type': 'Delete',
'actor': current_user.profile_id(),
'audience': post.community.profile_id(),
'to': [post.community.profile_id(), 'https://www.w3.org/ns/activitystreams#Public'],
'published': ap_datetime(utcnow()),
'cc': [
current_user.followers_url()
],
'object': post.ap_id,
}
if not post.community.is_local(): # this is a remote community, send it to the instance that hosts it
success = post_request(post.community.ap_inbox_url, delete_json, current_user.private_key,
current_user.ap_profile_id + '#main-key')
else: # local community - send it to followers on remote instances
announce = {
"id": f"https://{current_app.config['SERVER_NAME']}/activities/announce/{gibberish(15)}",
"type": 'Announce',
"to": [
"https://www.w3.org/ns/activitystreams#Public"
],
"actor": post.community.ap_profile_id,
"cc": [
post.community.ap_followers_url
],
'@context': default_context(),
'object': delete_json
}
for instance in post.community.following_instances():
if instance.inbox and not current_user.has_blocked_instance(instance.id) and not instance_banned(
instance.domain):
send_to_remote_instance(instance.id, post.community.id, announce)
def delete_post_reply_from_community(post_reply_id):
if current_app.debug:
delete_post_reply_from_community_task(post_reply_id)
else:
delete_post_reply_from_community_task.delay(post_reply_id)
@celery.task
def delete_post_reply_from_community_task(post_reply_id):
post_reply = PostReply.query.get(post_reply_id)
post = post_reply.post
community = post.community
if post_reply.user_id == current_user.id or community.is_moderator():
if post_reply.has_replies():
post_reply.body = 'Deleted by author' if post_reply.author.id == current_user.id else 'Deleted by moderator'
post_reply.body_html = markdown_to_html(post_reply.body)
else:
post_reply.delete_dependencies()
db.session.delete(post_reply)
db.session.commit()
post.flush_cache()
# federate delete
if not post.community.local_only:
delete_json = {
'id': f"https://{current_app.config['SERVER_NAME']}/activities/delete/{gibberish(15)}",
'type': 'Delete',
'actor': current_user.profile_id(),
'audience': post.community.profile_id(),
'to': [post.community.profile_id(), 'https://www.w3.org/ns/activitystreams#Public'],
'published': ap_datetime(utcnow()),
'cc': [
current_user.followers_url()
],
'object': post_reply.ap_id,
}
if not post.community.is_local(): # this is a remote community, send it to the instance that hosts it
success = post_request(post.community.ap_inbox_url, delete_json, current_user.private_key,
current_user.ap_profile_id + '#main-key')
else: # local community - send it to followers on remote instances
announce = {
"id": f"https://{current_app.config['SERVER_NAME']}/activities/announce/{gibberish(15)}",
"type": 'Announce',
"to": [
"https://www.w3.org/ns/activitystreams#Public"
],
"actor": post.community.ap_profile_id,
"cc": [
post.community.ap_followers_url
],
'@context': default_context(),
'object': delete_json
}
for instance in post.community.following_instances():
if instance.inbox and not current_user.has_blocked_instance(instance.id) and not instance_banned(
instance.domain):
send_to_remote_instance(instance.id, post.community.id, announce)
def remove_old_file(file_id):
remove_file = File.query.get(file_id)
remove_file.delete_from_disk()
def save_icon_file(icon_file, directory='communities') -> File:
# check if this is an allowed type of file
file_ext = os.path.splitext(icon_file.filename)[1]
2024-01-25 20:16:08 +13:00
if file_ext.lower() not in allowed_extensions:
abort(400)
new_filename = gibberish(15)
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# save the file
final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
icon_file.save(final_place)
if file_ext.lower() == '.heic':
register_heif_opener()
# resize if necessary
2024-01-25 20:16:08 +13:00
Image.MAX_IMAGE_PIXELS = 89478485
img = Image.open(final_place)
2024-01-25 20:16:08 +13:00
if '.' + img.format.lower() in allowed_extensions:
img = ImageOps.exif_transpose(img)
img_width = img.width
img_height = img.height
2024-01-25 20:16:08 +13:00
if img.width > 250 or img.height > 250:
img.thumbnail((250, 250))
img.save(final_place)
img_width = img.width
img_height = img.height
# save a second, smaller, version as a thumbnail
img.thumbnail((40, 40))
img.save(final_place_thumbnail, format="WebP", quality=93)
thumbnail_width = img.width
thumbnail_height = img.height
file = File(file_path=final_place, file_name=new_filename + file_ext, alt_text=f'{directory} icon',
width=img_width, height=img_height, thumbnail_width=thumbnail_width,
thumbnail_height=thumbnail_height, thumbnail_path=final_place_thumbnail)
db.session.add(file)
return file
else:
abort(400)
def save_banner_file(banner_file, directory='communities') -> File:
# check if this is an allowed type of file
file_ext = os.path.splitext(banner_file.filename)[1]
2024-01-25 20:16:08 +13:00
if file_ext.lower() not in allowed_extensions:
abort(400)
new_filename = gibberish(15)
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# save the file
final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
banner_file.save(final_place)
if file_ext.lower() == '.heic':
register_heif_opener()
# resize if necessary
2024-01-25 20:16:08 +13:00
Image.MAX_IMAGE_PIXELS = 89478485
img = Image.open(final_place)
2024-01-25 20:16:08 +13:00
if '.' + img.format.lower() in allowed_extensions:
img = ImageOps.exif_transpose(img)
img_width = img.width
img_height = img.height
2024-01-25 20:16:08 +13:00
if img.width > 1600 or img.height > 600:
img.thumbnail((1600, 600))
img.save(final_place)
img_width = img.width
img_height = img.height
2024-01-25 20:16:08 +13:00
# save a second, smaller, version as a thumbnail
2024-02-10 06:41:24 +13:00
img.thumbnail((878, 500))
2024-01-25 20:16:08 +13:00
img.save(final_place_thumbnail, format="WebP", quality=93)
thumbnail_width = img.width
thumbnail_height = img.height
2024-01-25 20:16:08 +13:00
file = File(file_path=final_place, file_name=new_filename + file_ext, alt_text=f'{directory} banner',
2024-02-10 06:41:24 +13:00
width=img_width, height=img_height, thumbnail_path=final_place_thumbnail,
thumbnail_width=thumbnail_width, thumbnail_height=thumbnail_height)
2024-01-25 20:16:08 +13:00
db.session.add(file)
return file
else:
abort(400)
# NB this always signs POSTs as the community so is only suitable for Announce activities
def send_to_remote_instance(instance_id: int, community_id: int, payload):
if current_app.debug:
send_to_remote_instance_task(instance_id, community_id, payload)
else:
send_to_remote_instance_task.delay(instance_id, community_id, payload)
@celery.task
def send_to_remote_instance_task(instance_id: int, community_id: int, payload):
community = Community.query.get(community_id)
if community:
instance = Instance.query.get(instance_id)
if post_request(instance.inbox, payload, community.private_key, community.ap_profile_id + '#main-key'):
instance.last_successful_send = utcnow()
instance.failures = 0
else:
instance.failures += 1
instance.most_recent_attempt = utcnow()
instance.start_trying_again = utcnow() + timedelta(seconds=instance.failures ** 4)
if instance.failures > 2:
instance.dormant = True
2024-01-07 12:47:06 +13:00
db.session.commit()