2024-01-03 16:29:58 +13:00
from datetime import datetime , timedelta
2023-12-21 22:14:43 +13:00
from threading import Thread
from time import sleep
2023-10-10 22:25:37 +13:00
from typing import List
2023-11-29 20:32:07 +13:00
import requests
from PIL import Image , ImageOps
2023-12-21 22:14:43 +13:00
from flask import request , abort , g , current_app
2023-11-30 20:57:51 +13:00
from flask_login import current_user
from pillow_heif import register_heif_opener
2023-11-29 20:32:07 +13:00
2023-12-25 21:44:10 +13:00
from app import db , cache , celery
2023-12-26 21:39:52 +13:00
from app . activitypub . signature import post_request
2023-12-21 22:14:43 +13:00
from app . activitypub . util import find_actor_or_create , actor_json_to_model , post_json_to_model
2023-11-30 20:57:51 +13:00
from app . constants import POST_TYPE_ARTICLE , POST_TYPE_LINK , POST_TYPE_IMAGE
2024-01-03 16:29:58 +13:00
from app . models import Community , File , BannedInstances , PostReply , PostVote , Post , utcnow , CommunityMember , Site , \
2024-01-07 12:47:06 +13:00
Instance , Notification , User
2023-12-21 22:14:43 +13:00
from app . utils import get_request , gibberish , markdown_to_html , domain_from_url , validate_image , allowlist_html , \
2024-01-07 12:47:06 +13:00
html_to_markdown , is_image_url , ensure_directory_exists , inbox_domain , post_ranking , shorten_string
2023-11-09 22:44:09 +13:00
from sqlalchemy import desc , text
2023-11-27 22:05:35 +13:00
import os
2023-11-29 20:32:07 +13:00
from opengraph_parse import parse_page
2023-08-29 22:01:06 +12:00
2023-12-08 17:13:38 +13:00
allowed_extensions = [ ' .gif ' , ' .jpg ' , ' .jpeg ' , ' .png ' , ' .webp ' , ' .heic ' ]
2023-12-21 22:14:43 +13:00
2023-08-29 22:01:06 +12:00
def search_for_community ( address : str ) :
if address . startswith ( ' ! ' ) :
name , server = address [ 1 : ] . split ( ' @ ' )
banned = BannedInstances . query . filter_by ( domain = server ) . first ( )
if banned :
reason = f " Reason: { banned . reason } " if banned . reason is not None else ' '
2023-09-05 20:25:02 +12:00
raise Exception ( f " { server } is blocked. { reason } " ) # todo: create custom exception class hierarchy
2023-08-29 22:01:06 +12:00
already_exists = Community . query . filter_by ( ap_id = address [ 1 : ] ) . first ( )
if already_exists :
return already_exists
# Look up the profile address of the community using WebFinger
# todo: try, except block around every get_request
webfinger_data = get_request ( f " https:// { server } /.well-known/webfinger " ,
params = { ' resource ' : f " acct: { address [ 1 : ] } " } )
if webfinger_data . status_code == 200 :
webfinger_json = webfinger_data . json ( )
for links in webfinger_json [ ' links ' ] :
2023-09-05 20:25:02 +12:00
if ' rel ' in links and links [ ' rel ' ] == ' self ' : # this contains the URL of the activitypub profile
2023-08-29 22:01:06 +12:00
type = links [ ' type ' ] if ' type ' in links else ' application/activity+json '
# retrieve the activitypub profile
community_data = get_request ( links [ ' href ' ] , headers = { ' Accept ' : type } )
# to see the structure of the json contained in community_data, do a GET to https://lemmy.world/c/technology with header Accept: application/activity+json
if community_data . status_code == 200 :
community_json = community_data . json ( )
2023-12-21 22:14:43 +13:00
community_data . close ( )
2023-08-29 22:01:06 +12:00
if community_json [ ' type ' ] == ' Group ' :
2024-01-07 09:29:36 +13:00
community = actor_json_to_model ( community_json , name , server )
2024-01-21 21:04:48 +13:00
if community :
if current_app . debug :
retrieve_mods_and_backfill ( community . id )
else :
retrieve_mods_and_backfill . delay ( community . id )
2023-08-29 22:01:06 +12:00
return community
return None
2023-09-05 20:25:02 +12:00
2023-12-25 21:44:10 +13:00
@celery.task
def retrieve_mods_and_backfill ( community_id : int ) :
with current_app . app_context ( ) :
community = Community . query . get ( community_id )
site = Site . query . get ( 1 )
2023-12-21 22:14:43 +13:00
if community . ap_moderators_url :
mods_request = get_request ( community . ap_moderators_url , headers = { ' Accept ' : ' application/activity+json ' } )
if mods_request . status_code == 200 :
mods_data = mods_request . json ( )
mods_request . close ( )
if mods_data and mods_data [ ' type ' ] == ' OrderedCollection ' and ' orderedItems ' in mods_data :
for actor in mods_data [ ' orderedItems ' ] :
sleep ( 0.5 )
user = find_actor_or_create ( actor )
if user :
existing_membership = CommunityMember . query . filter_by ( community_id = community . id , user_id = user . id ) . first ( )
if existing_membership :
existing_membership . is_moderator = True
else :
new_membership = CommunityMember ( community_id = community . id , user_id = user . id , is_moderator = True )
db . session . add ( new_membership )
db . session . commit ( )
# only backfill nsfw if nsfw communities are allowed
2023-12-25 21:44:10 +13:00
if ( community . nsfw and not site . enable_nsfw ) or ( community . nsfl and not site . enable_nsfl ) :
2023-12-21 22:14:43 +13:00
return
# download 50 old posts
if community . ap_public_url :
outbox_request = get_request ( community . ap_public_url + ' /outbox ' , headers = { ' Accept ' : ' application/activity+json ' } )
if outbox_request . status_code == 200 :
outbox_data = outbox_request . json ( )
outbox_request . close ( )
if outbox_data [ ' type ' ] == ' OrderedCollection ' and ' orderedItems ' in outbox_data :
activities_processed = 0
for activity in outbox_data [ ' orderedItems ' ] :
user = find_actor_or_create ( activity [ ' object ' ] [ ' actor ' ] )
if user :
post = post_json_to_model ( activity [ ' object ' ] [ ' object ' ] , user , community )
post . ap_create_id = activity [ ' object ' ] [ ' id ' ]
post . ap_announce_id = activity [ ' id ' ]
2024-01-03 20:14:39 +13:00
post . ranking = post_ranking ( post . score , post . posted_at )
2023-12-21 22:14:43 +13:00
db . session . commit ( )
activities_processed + = 1
if activities_processed > = 50 :
break
2023-12-22 14:05:39 +13:00
c = Community . query . get ( community . id )
c . post_count = activities_processed
2024-01-12 13:24:49 +13:00
c . last_active = site . last_active = utcnow ( )
2023-12-21 22:14:43 +13:00
db . session . commit ( )
2023-09-05 20:25:02 +12:00
def community_url_exists ( url ) - > bool :
2023-09-17 21:19:51 +12:00
community = Community . query . filter_by ( ap_profile_id = url ) . first ( )
2023-09-05 20:25:02 +12:00
return community is not None
2023-09-17 21:19:51 +12:00
def actor_to_community ( actor ) - > Community :
actor = actor . strip ( )
if ' @ ' in actor :
community = Community . query . filter_by ( banned = False , ap_id = actor ) . first ( )
else :
community = Community . query . filter_by ( name = actor , banned = False , ap_id = None ) . first ( )
return community
2023-10-10 22:25:37 +13:00
2023-11-29 20:32:07 +13:00
@cache.memoize ( timeout = 50 )
def opengraph_parse ( url ) :
2024-01-09 20:44:08 +13:00
if ' ? ' in url :
url = url . split ( ' ? ' )
url = url [ 0 ]
2023-11-29 20:32:07 +13:00
try :
return parse_page ( url )
except Exception as ex :
return None
def url_to_thumbnail_file ( filename ) - > File :
unused , file_extension = os . path . splitext ( filename )
response = requests . get ( filename , timeout = 5 )
if response . status_code == 200 :
new_filename = gibberish ( 15 )
directory = ' app/static/media/posts/ ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
ensure_directory_exists ( directory )
final_place = os . path . join ( directory , new_filename + file_extension )
with open ( final_place , ' wb ' ) as f :
f . write ( response . content )
2024-01-09 20:44:08 +13:00
response . close ( )
2023-11-29 20:32:07 +13:00
with Image . open ( final_place ) as img :
img = ImageOps . exif_transpose ( img )
img . thumbnail ( ( 150 , 150 ) )
img . save ( final_place )
thumbnail_width = img . width
thumbnail_height = img . height
return File ( file_name = new_filename + file_extension , thumbnail_width = thumbnail_width ,
thumbnail_height = thumbnail_height , thumbnail_path = final_place ,
source_url = filename )
2023-11-30 20:57:51 +13:00
2023-12-09 22:14:16 +13:00
def save_post ( form , post : Post ) :
2023-11-30 20:57:51 +13:00
post . nsfw = form . nsfw . data
post . nsfl = form . nsfl . data
post . notify_author = form . notify_author . data
if form . type . data == ' ' or form . type . data == ' discussion ' :
post . title = form . discussion_title . data
post . body = form . discussion_body . data
post . body_html = markdown_to_html ( post . body )
post . type = POST_TYPE_ARTICLE
elif form . type . data == ' link ' :
post . title = form . link_title . data
2024-01-07 18:30:27 +13:00
post . body = form . link_body . data
post . body_html = markdown_to_html ( post . body )
2023-11-30 20:57:51 +13:00
url_changed = post . id is None or form . link_url . data != post . url
post . url = form . link_url . data
post . type = POST_TYPE_LINK
domain = domain_from_url ( form . link_url . data )
domain . post_count + = 1
post . domain = domain
if url_changed :
if post . image_id :
remove_old_file ( post . image_id )
post . image_id = None
valid_extensions = { ' .jpg ' , ' .jpeg ' , ' .png ' , ' .gif ' , ' .webp ' }
unused , file_extension = os . path . splitext ( form . link_url . data ) # do not use _ here instead of 'unused'
# this url is a link to an image - generate a thumbnail of it
2024-01-07 14:36:55 +13:00
if file_extension . lower ( ) in valid_extensions :
2023-11-30 20:57:51 +13:00
file = url_to_thumbnail_file ( form . link_url . data )
if file :
post . image = file
db . session . add ( file )
else :
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
opengraph = opengraph_parse ( form . link_url . data )
if opengraph and opengraph . get ( ' og:image ' , ' ' ) != ' ' :
filename = opengraph . get ( ' og:image ' )
valid_extensions = { ' .jpg ' , ' .jpeg ' , ' .png ' , ' .gif ' , ' .webp ' }
unused , file_extension = os . path . splitext ( filename )
if file_extension . lower ( ) in valid_extensions :
file = url_to_thumbnail_file ( filename )
if file :
file . alt_text = opengraph . get ( ' og:title ' )
post . image = file
db . session . add ( file )
elif form . type . data == ' image ' :
post . title = form . image_title . data
2024-01-07 18:30:27 +13:00
post . body = form . image_body . data
post . body_html = markdown_to_html ( post . body )
2023-11-30 20:57:51 +13:00
post . type = POST_TYPE_IMAGE
uploaded_file = request . files [ ' image_file ' ]
if uploaded_file and uploaded_file . filename != ' ' :
if post . image_id :
remove_old_file ( post . image_id )
post . image_id = None
2023-12-08 17:13:38 +13:00
# check if this is an allowed type of file
2023-11-30 20:57:51 +13:00
file_ext = os . path . splitext ( uploaded_file . filename ) [ 1 ]
2024-01-07 14:36:55 +13:00
if file_ext . lower ( ) not in allowed_extensions or file_ext . lower ( ) != validate_image ( uploaded_file . stream ) :
2023-11-30 20:57:51 +13:00
abort ( 400 )
new_filename = gibberish ( 15 )
2023-12-08 17:13:38 +13:00
# set up the storage directory
2023-11-30 20:57:51 +13:00
directory = ' app/static/media/posts/ ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
ensure_directory_exists ( directory )
2023-12-08 17:13:38 +13:00
# save the file
2023-11-30 20:57:51 +13:00
final_place = os . path . join ( directory , new_filename + file_ext )
final_place_thumbnail = os . path . join ( directory , new_filename + ' _thumbnail.webp ' )
uploaded_file . save ( final_place )
if file_ext . lower ( ) == ' .heic ' :
register_heif_opener ( )
# resize if necessary
img = Image . open ( final_place )
2023-12-08 17:13:38 +13:00
img = ImageOps . exif_transpose ( img )
2023-11-30 20:57:51 +13:00
img_width = img . width
img_height = img . height
if img . width > 2000 or img . height > 2000 :
img . thumbnail ( ( 2000 , 2000 ) )
img . save ( final_place )
img_width = img . width
img_height = img . height
2023-12-08 17:13:38 +13:00
# save a second, smaller, version as a thumbnail
2023-11-30 20:57:51 +13:00
img . thumbnail ( ( 256 , 256 ) )
img . save ( final_place_thumbnail , format = " WebP " , quality = 93 )
thumbnail_width = img . width
thumbnail_height = img . height
file = File ( file_path = final_place , file_name = new_filename + file_ext , alt_text = form . image_title . data ,
width = img_width , height = img_height , thumbnail_width = thumbnail_width ,
2024-01-07 18:15:28 +13:00
thumbnail_height = thumbnail_height , thumbnail_path = final_place_thumbnail ,
source_url = final_place . replace ( ' app/static/ ' , f " https:// { current_app . config [ ' SERVER_NAME ' ] } /static/ " ) )
2023-11-30 20:57:51 +13:00
post . image = file
db . session . add ( file )
elif form . type . data == ' poll ' :
. . .
else :
raise Exception ( ' invalid post type ' )
if post . id is None :
2024-01-02 16:07:41 +13:00
if current_user . reputation > 100 :
post . up_votes = 1
post . score = 1
if current_user . reputation < - 100 :
post . score = - 1
2024-01-03 20:14:39 +13:00
post . ranking = post_ranking ( post . score , utcnow ( ) )
2023-11-30 20:57:51 +13:00
db . session . add ( post )
2024-01-03 20:14:39 +13:00
2023-12-17 00:12:49 +13:00
g . site . last_active = utcnow ( )
2023-11-30 20:57:51 +13:00
def remove_old_file ( file_id ) :
remove_file = File . query . get ( file_id )
remove_file . delete_from_disk ( )
2023-12-08 17:13:38 +13:00
2023-12-25 21:44:10 +13:00
def save_icon_file ( icon_file , directory = ' communities ' ) - > File :
2023-12-08 17:13:38 +13:00
# check if this is an allowed type of file
file_ext = os . path . splitext ( icon_file . filename ) [ 1 ]
2024-01-07 14:36:55 +13:00
if file_ext . lower ( ) not in allowed_extensions or file_ext . lower ( ) != validate_image ( icon_file . stream ) :
2023-12-08 17:13:38 +13:00
abort ( 400 )
new_filename = gibberish ( 15 )
# set up the storage directory
2023-12-25 21:44:10 +13:00
directory = f ' app/static/media/ { directory } / ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
2023-12-08 17:13:38 +13:00
ensure_directory_exists ( directory )
# save the file
final_place = os . path . join ( directory , new_filename + file_ext )
final_place_thumbnail = os . path . join ( directory , new_filename + ' _thumbnail.webp ' )
icon_file . save ( final_place )
if file_ext . lower ( ) == ' .heic ' :
register_heif_opener ( )
# resize if necessary
img = Image . open ( final_place )
img = ImageOps . exif_transpose ( img )
img_width = img . width
img_height = img . height
2023-12-24 16:20:18 +13:00
if img . width > 250 or img . height > 250 :
img . thumbnail ( ( 250 , 250 ) )
2023-12-08 17:13:38 +13:00
img . save ( final_place )
img_width = img . width
img_height = img . height
# save a second, smaller, version as a thumbnail
2023-12-24 16:20:18 +13:00
img . thumbnail ( ( 40 , 40 ) )
2023-12-08 17:13:38 +13:00
img . save ( final_place_thumbnail , format = " WebP " , quality = 93 )
thumbnail_width = img . width
thumbnail_height = img . height
2023-12-25 21:44:10 +13:00
file = File ( file_path = final_place , file_name = new_filename + file_ext , alt_text = f ' { directory } icon ' ,
2023-12-08 17:13:38 +13:00
width = img_width , height = img_height , thumbnail_width = thumbnail_width ,
thumbnail_height = thumbnail_height , thumbnail_path = final_place_thumbnail )
db . session . add ( file )
return file
2023-12-25 21:44:10 +13:00
def save_banner_file ( banner_file , directory = ' communities ' ) - > File :
2023-12-08 17:13:38 +13:00
# check if this is an allowed type of file
file_ext = os . path . splitext ( banner_file . filename ) [ 1 ]
2024-01-07 14:36:55 +13:00
if file_ext . lower ( ) not in allowed_extensions or file_ext . lower ( ) != validate_image (
2023-12-08 17:13:38 +13:00
banner_file . stream ) :
abort ( 400 )
new_filename = gibberish ( 15 )
# set up the storage directory
2023-12-25 21:44:10 +13:00
directory = f ' app/static/media/ { directory } / ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
2023-12-08 17:13:38 +13:00
ensure_directory_exists ( directory )
# save the file
final_place = os . path . join ( directory , new_filename + file_ext )
final_place_thumbnail = os . path . join ( directory , new_filename + ' _thumbnail.webp ' )
banner_file . save ( final_place )
if file_ext . lower ( ) == ' .heic ' :
register_heif_opener ( )
# resize if necessary
img = Image . open ( final_place )
img = ImageOps . exif_transpose ( img )
img_width = img . width
img_height = img . height
2023-12-24 16:20:18 +13:00
if img . width > 1600 or img . height > 600 :
img . thumbnail ( ( 1600 , 600 ) )
2023-12-08 17:13:38 +13:00
img . save ( final_place )
img_width = img . width
img_height = img . height
2023-12-24 16:20:18 +13:00
# save a second, smaller, version as a thumbnail
img . thumbnail ( ( 700 , 500 ) )
img . save ( final_place_thumbnail , format = " WebP " , quality = 93 )
thumbnail_width = img . width
thumbnail_height = img . height
2023-12-25 21:44:10 +13:00
file = File ( file_path = final_place , file_name = new_filename + file_ext , alt_text = f ' { directory } banner ' ,
2023-12-24 16:20:18 +13:00
width = img_width , height = img_height , thumbnail_width = thumbnail_width , thumbnail_height = thumbnail_height )
2023-12-08 17:13:38 +13:00
db . session . add ( file )
2023-12-26 21:39:52 +13:00
return file
2024-01-03 16:29:58 +13:00
# NB this always signs POSTs as the community so is only suitable for Announce activities
def send_to_remote_instance ( instance_id : int , community_id : int , payload ) :
2023-12-26 21:39:52 +13:00
if current_app . debug :
2024-01-03 16:29:58 +13:00
send_to_remote_instance_task ( instance_id , community_id , payload )
2023-12-26 21:39:52 +13:00
else :
2024-01-03 16:29:58 +13:00
send_to_remote_instance_task . delay ( instance_id , community_id , payload )
2023-12-26 21:39:52 +13:00
@celery.task
2024-01-03 16:29:58 +13:00
def send_to_remote_instance_task ( instance_id : int , community_id : int , payload ) :
2023-12-26 21:39:52 +13:00
community = Community . query . get ( community_id )
if community :
2024-01-03 16:29:58 +13:00
instance = Instance . query . get ( instance_id )
if post_request ( instance . inbox , payload , community . private_key , community . ap_profile_id + ' #main-key ' ) :
instance . last_successful_send = utcnow ( )
instance . failures = 0
else :
instance . failures + = 1
instance . most_recent_attempt = utcnow ( )
instance . start_trying_again = utcnow ( ) + timedelta ( seconds = instance . failures * * 4 )
if instance . failures > 2 :
instance . dormant = True
2024-01-07 12:47:06 +13:00
db . session . commit ( )