2023-12-22 15:34:45 +13:00
from __future__ import annotations
2024-01-18 15:15:10 +13:00
import html
2023-08-05 21:24:10 +12:00
import os
2024-08-23 08:47:19 +00:00
from datetime import timedelta , datetime , timezone
2023-12-24 16:20:18 +13:00
from random import randint
2024-06-05 20:33:00 +12:00
from typing import Union , Tuple , List
2024-04-04 21:36:03 +13:00
2024-09-15 19:30:45 +12:00
import httpx
2024-04-04 21:36:03 +13:00
import redis
2024-05-26 15:53:17 +01:00
from flask import current_app , request , g , url_for , json
2024-02-10 19:58:34 +13:00
from flask_babel import _
2024-06-28 12:30:19 +08:00
from requests import JSONDecodeError
2024-06-05 20:33:00 +12:00
from sqlalchemy import text , func , desc
2024-11-14 20:16:09 +13:00
from sqlalchemy . exc import IntegrityError
2023-12-24 16:20:18 +13:00
from app import db , cache , constants , celery
2023-12-22 14:05:39 +13:00
from app . models import User , Post , Community , BannedInstances , File , PostReply , AllowedInstances , Instance , utcnow , \
2024-04-16 21:49:05 +12:00
PostVote , PostReplyVote , ActivityPubLog , Notification , Site , CommunityMember , InstanceRole , Report , Conversation , \
2024-11-02 16:02:29 +13:00
Language , Tag , Poll , PollChoice , UserFollower , CommunityBan , CommunityJoinRequest , NotificationSubscription , Licence
2024-05-31 22:05:44 +01:00
from app . activitypub . signature import signed_get_request , post_request
2023-08-10 21:13:37 +12:00
import time
from app . constants import *
2024-04-04 21:36:03 +13:00
from urllib . parse import urlparse , parse_qs
2023-12-24 16:20:18 +13:00
from PIL import Image , ImageOps
from io import BytesIO
2024-01-13 18:18:32 +13:00
import pytesseract
2023-08-05 21:24:10 +12:00
2024-03-27 16:02:04 +13:00
from app . utils import get_request , allowlist_html , get_setting , ap_datetime , markdown_to_html , \
2024-10-14 15:37:00 +13:00
is_image_url , domain_from_url , gibberish , ensure_directory_exists , head_request , \
shorten_string , remove_tracking_from_link , \
microblog_content_to_title , generate_image_from_video_url , is_video_url , \
2024-09-21 20:05:34 +00:00
notification_subscribers , communities_banned_from , actor_contains_blocked_words , \
2024-10-14 15:37:00 +13:00
html_to_text , add_to_modlog_activitypub , joined_communities , \
2024-11-27 15:29:22 +00:00
moderating_communities , get_task_session , is_video_hosting_site , opengraph_parse
2023-09-08 20:04:01 +12:00
2024-08-19 23:44:58 +00:00
from sqlalchemy import or_
2023-08-05 21:24:10 +12:00
def public_key ( ) :
if not os . path . exists ( ' ./public.pem ' ) :
os . system ( ' openssl genrsa -out private.pem 2048 ' )
os . system ( ' openssl rsa -in private.pem -outform PEM -pubout -out public.pem ' )
else :
publicKey = open ( ' ./public.pem ' , ' r ' ) . read ( )
PUBLICKEY = publicKey . replace ( ' \n ' , ' \\ n ' ) # JSON-LD doesn't want to work with linebreaks,
# but needs the \n character to know where to break the line ;)
return PUBLICKEY
2023-08-10 21:13:37 +12:00
2023-12-27 14:38:41 +13:00
def community_members ( community_id ) :
sql = ' SELECT COUNT(id) as c FROM " user " as u '
sql + = ' INNER JOIN community_member cm on u.id = cm.user_id '
sql + = ' WHERE u.banned is false AND u.deleted is false AND cm.is_banned is false and cm.community_id = :community_id '
return db . session . execute ( text ( sql ) , { ' community_id ' : community_id } ) . scalar ( )
2023-08-10 21:13:37 +12:00
def users_total ( ) :
return db . session . execute ( text (
' SELECT COUNT(id) as c FROM " user " WHERE ap_id is null AND verified is true AND banned is false AND deleted is false ' ) ) . scalar ( )
def active_half_year ( ) :
return db . session . execute ( text (
" SELECT COUNT(id) as c FROM \" user \" WHERE last_seen >= CURRENT_DATE - INTERVAL ' 6 months ' AND ap_id is null AND verified is true AND banned is false AND deleted is false " ) ) . scalar ( )
def active_month ( ) :
return db . session . execute ( text (
" SELECT COUNT(id) as c FROM \" user \" WHERE last_seen >= CURRENT_DATE - INTERVAL ' 1 month ' AND ap_id is null AND verified is true AND banned is false AND deleted is false " ) ) . scalar ( )
2024-02-14 14:38:55 +13:00
def active_week ( ) :
return db . session . execute ( text (
" SELECT COUNT(id) as c FROM \" user \" WHERE last_seen >= CURRENT_DATE - INTERVAL ' 1 week ' AND ap_id is null AND verified is true AND banned is false AND deleted is false " ) ) . scalar ( )
def active_day ( ) :
return db . session . execute ( text (
" SELECT COUNT(id) as c FROM \" user \" WHERE last_seen >= CURRENT_DATE - INTERVAL ' 1 day ' AND ap_id is null AND verified is true AND banned is false AND deleted is false " ) ) . scalar ( )
2023-08-10 21:13:37 +12:00
def local_posts ( ) :
2024-06-02 16:45:21 +12:00
return db . session . execute ( text ( ' SELECT COUNT(id) as c FROM " post " WHERE instance_id = 1 AND deleted is false ' ) ) . scalar ( )
2023-08-10 21:13:37 +12:00
def local_comments ( ) :
2024-06-02 16:45:21 +12:00
return db . session . execute ( text ( ' SELECT COUNT(id) as c FROM " post_reply " WHERE instance_id = 1 and deleted is false ' ) ) . scalar ( )
2023-08-10 21:13:37 +12:00
2024-05-10 09:14:33 +01:00
2024-02-14 14:38:55 +13:00
def local_communities ( ) :
return db . session . execute ( text ( ' SELECT COUNT(id) as c FROM " community " WHERE instance_id = 1 ' ) ) . scalar ( )
2023-08-10 21:13:37 +12:00
def post_to_activity ( post : Post , community : Community ) :
2024-03-09 19:13:15 +13:00
# local PieFed posts do not have a create or announce id
create_id = post . ap_create_id if post . ap_create_id else f " https:// { current_app . config [ ' SERVER_NAME ' ] } /activities/create/ { gibberish ( 15 ) } "
announce_id = post . ap_announce_id if post . ap_announce_id else f " https:// { current_app . config [ ' SERVER_NAME ' ] } /activities/announce/ { gibberish ( 15 ) } "
2023-08-10 21:13:37 +12:00
activity_data = {
2024-06-05 20:33:00 +12:00
" actor " : community . public_url ( ) ,
2023-08-10 21:13:37 +12:00
" to " : [
" https://www.w3.org/ns/activitystreams#Public "
] ,
" object " : {
2024-03-09 19:13:15 +13:00
" id " : create_id ,
2024-06-05 13:21:41 +12:00
" actor " : post . author . public_url ( ) ,
2023-08-10 21:13:37 +12:00
" to " : [
" https://www.w3.org/ns/activitystreams#Public "
] ,
2024-06-05 22:45:35 +01:00
" object " : post_to_page ( post ) ,
2023-08-10 21:13:37 +12:00
" cc " : [
2024-06-05 20:33:00 +12:00
community . public_url ( )
2023-08-10 21:13:37 +12:00
] ,
" type " : " Create " ,
2024-06-05 20:33:00 +12:00
" audience " : community . public_url ( )
2023-08-10 21:13:37 +12:00
} ,
" cc " : [
2024-06-05 20:33:00 +12:00
f " { community . public_url ( ) } /followers "
2023-08-10 21:13:37 +12:00
] ,
" type " : " Announce " ,
2024-03-09 19:13:15 +13:00
" id " : announce_id
2023-08-10 21:13:37 +12:00
}
2024-05-18 21:06:57 +12:00
2023-08-10 21:13:37 +12:00
return activity_data
2023-08-22 21:24:11 +12:00
2024-05-31 03:45:51 +01:00
def post_to_page ( post : Post ) :
2024-03-24 22:10:41 +00:00
activity_data = {
" type " : " Page " ,
" id " : post . ap_id ,
" attributedTo " : post . author . ap_public_url ,
" to " : [
2024-06-05 22:45:35 +01:00
post . community . public_url ( ) ,
2024-03-24 22:10:41 +00:00
" https://www.w3.org/ns/activitystreams#Public "
] ,
" name " : post . title ,
" cc " : [ ] ,
" content " : post . body_html if post . body_html else ' ' ,
" mediaType " : " text/html " ,
2024-09-22 13:42:02 +00:00
" source " : { " content " : post . body if post . body else ' ' , " mediaType " : " text/markdown " } ,
2024-03-24 22:10:41 +00:00
" attachment " : [ ] ,
" commentsEnabled " : post . comments_enabled ,
" sensitive " : post . nsfw or post . nsfl ,
" published " : ap_datetime ( post . created_at ) ,
" stickied " : post . sticky ,
2024-06-05 22:45:35 +01:00
" audience " : post . community . public_url ( ) ,
2024-05-12 13:02:45 +12:00
" tag " : post . tags_for_activitypub ( ) ,
2024-06-05 22:45:35 +01:00
" replies " : post_replies_for_ap ( post . id ) ,
" language " : {
" identifier " : post . language_code ( ) ,
" name " : post . language_name ( )
2024-05-12 13:02:45 +12:00
} ,
2024-03-24 22:10:41 +00:00
}
if post . edited_at is not None :
activity_data [ " updated " ] = ap_datetime ( post . edited_at )
2024-04-16 20:59:58 +12:00
if ( post . type == POST_TYPE_LINK or post . type == POST_TYPE_VIDEO ) and post . url is not None :
2024-03-24 22:10:41 +00:00
activity_data [ " attachment " ] = [ { " href " : post . url , " type " : " Link " } ]
if post . image_id is not None :
activity_data [ " image " ] = { " url " : post . image . view_url ( ) , " type " : " Image " }
2024-09-01 19:14:05 +01:00
if post . type == POST_TYPE_IMAGE :
activity_data [ ' attachment ' ] = [ { ' type ' : ' Image ' ,
' url ' : post . image . source_url ,
' name ' : post . image . alt_text } ]
2024-05-31 04:04:15 +01:00
if post . type == POST_TYPE_POLL :
poll = Poll . query . filter_by ( post_id = post . id ) . first ( )
activity_data [ ' type ' ] = ' Question '
mode = ' oneOf ' if poll . mode == ' single ' else ' anyOf '
choices = [ ]
for choice in PollChoice . query . filter_by ( post_id = post . id ) . order_by ( PollChoice . sort_order ) . all ( ) :
choices . append ( {
" type " : " Note " ,
" name " : choice . choice_text ,
" replies " : {
" type " : " Collection " ,
" totalItems " : choice . num_votes
}
} )
activity_data [ mode ] = choices
activity_data [ ' endTime ' ] = ap_datetime ( poll . end_poll )
activity_data [ ' votersCount ' ] = poll . total_votes ( )
2024-10-13 10:51:50 +13:00
if post . indexable :
activity_data [ ' searchableBy ' ] = ' https://www.w3.org/ns/activitystreams#Public '
2024-03-24 22:10:41 +00:00
return activity_data
2024-06-05 20:33:00 +12:00
def post_replies_for_ap ( post_id : int ) - > List [ dict ] :
replies = PostReply . query . \
filter_by ( post_id = post_id , deleted = False ) . \
order_by ( desc ( PostReply . posted_at ) ) . \
limit ( 2000 )
return [ comment_model_to_json ( reply ) for reply in replies ]
def comment_model_to_json ( reply : PostReply ) - > dict :
reply_data = {
" @context " : [
" https://www.w3.org/ns/activitystreams " ,
" https://w3id.org/security/v1 " ,
] ,
" type " : " Note " ,
" id " : reply . ap_id ,
" attributedTo " : reply . author . public_url ( ) ,
" inReplyTo " : reply . in_reply_to ( ) ,
" to " : [
" https://www.w3.org/ns/activitystreams#Public " ,
reply . to ( )
] ,
" cc " : [
reply . community . public_url ( ) ,
reply . author . followers_url ( )
] ,
' content ' : reply . body_html ,
' mediaType ' : ' text/html ' ,
2024-09-22 13:42:02 +00:00
' source ' : { ' content ' : reply . body , ' mediaType ' : ' text/markdown ' } ,
2024-06-05 20:33:00 +12:00
' published ' : ap_datetime ( reply . created_at ) ,
' distinguished ' : False ,
' audience ' : reply . community . public_url ( ) ,
' language ' : {
' identifier ' : reply . language_code ( ) ,
' name ' : reply . language_name ( )
}
}
if reply . edited_at :
reply_data [ ' updated ' ] = ap_datetime ( reply . edited_at )
2024-10-19 21:56:01 +00:00
if reply . deleted :
if reply . deleted_by == reply . user_id :
reply_data [ ' content ' ] = ' <p>Deleted by author</p> '
reply_data [ ' source ' ] [ ' content ' ] = ' Deleted by author '
else :
reply_data [ ' content ' ] = ' <p>Deleted by moderator</p> '
reply_data [ ' source ' ] [ ' content ' ] = ' Deleted by moderator '
2024-06-05 20:37:56 +12:00
return reply_data
2024-06-05 20:33:00 +12:00
2023-08-22 21:24:11 +12:00
def banned_user_agents ( ) :
2023-09-08 20:04:01 +12:00
return [ ] # todo: finish this function
2023-08-22 21:24:11 +12:00
2023-09-17 21:19:51 +12:00
@cache.memoize ( 150 )
2024-01-03 16:29:58 +13:00
def instance_blocked ( host : str ) - > bool : # see also utils.instance_banned()
2024-04-09 19:23:19 +12:00
if host is None or host == ' ' :
return True
2023-09-16 19:09:04 +12:00
host = host . lower ( )
if ' https:// ' in host or ' http:// ' in host :
host = urlparse ( host ) . hostname
2023-08-22 21:24:11 +12:00
instance = BannedInstances . query . filter_by ( domain = host . strip ( ) ) . first ( )
return instance is not None
2023-11-03 20:32:12 +13:00
@cache.memoize ( 150 )
def instance_allowed ( host : str ) - > bool :
2024-04-09 19:23:19 +12:00
if host is None or host == ' ' :
return True
2023-11-03 20:32:12 +13:00
host = host . lower ( )
if ' https:// ' in host or ' http:// ' in host :
host = urlparse ( host ) . hostname
instance = AllowedInstances . query . filter_by ( domain = host . strip ( ) ) . first ( )
return instance is not None
2024-11-25 12:46:34 +00:00
def find_actor_or_create ( actor : str , create_if_not_found = True , community_only = False ) - > Union [ User , Community , None ] :
2024-05-27 22:44:58 +12:00
if isinstance ( actor , dict ) : # Discourse does this
actor = actor [ ' id ' ]
2024-03-16 20:17:24 +00:00
actor_url = actor . strip ( )
2024-02-10 16:29:03 +13:00
actor = actor . strip ( ) . lower ( )
2023-09-08 20:04:01 +12:00
user = None
2024-11-02 16:39:39 +13:00
server = ' '
2023-09-08 20:04:01 +12:00
# actor parameter must be formatted as https://server/u/actor or https://server/c/actor
2023-12-21 22:14:43 +13:00
# Initially, check if the user exists in the local DB already
2023-08-22 21:24:11 +12:00
if current_app . config [ ' SERVER_NAME ' ] + ' /c/ ' in actor :
2024-02-10 16:29:03 +13:00
return Community . query . filter ( Community . ap_profile_id == actor ) . first ( ) # finds communities formatted like https://localhost/c/*
2023-09-08 20:04:01 +12:00
if current_app . config [ ' SERVER_NAME ' ] + ' /u/ ' in actor :
2024-08-19 23:44:58 +00:00
alt_user_name = actor_url . rsplit ( ' / ' , 1 ) [ - 1 ]
user = User . query . filter ( or_ ( User . ap_profile_id == actor , User . alt_user_name == alt_user_name ) ) . filter_by ( ap_id = None , banned = False ) . first ( ) # finds local users
2023-09-08 20:04:01 +12:00
if user is None :
return None
elif actor . startswith ( ' https:// ' ) :
server , address = extract_domain_and_actor ( actor )
2023-11-03 20:32:12 +13:00
if get_setting ( ' use_allowlist ' , False ) :
if not instance_allowed ( server ) :
return None
else :
if instance_blocked ( server ) :
return None
2024-05-16 15:44:42 +12:00
if actor_contains_blocked_words ( actor ) :
return None
2024-02-10 16:29:03 +13:00
user = User . query . filter ( User . ap_profile_id == actor ) . first ( ) # finds users formatted like https://kbin.social/u/tables
2024-10-02 20:31:21 +13:00
if ( user and user . banned ) or ( user and user . deleted ) :
2023-10-21 15:49:01 +13:00
return None
2023-09-08 20:04:01 +12:00
if user is None :
2024-02-10 16:29:03 +13:00
user = Community . query . filter ( Community . ap_profile_id == actor ) . first ( )
2024-03-25 13:30:09 +13:00
if user and user . banned :
# Try to find a non-banned copy of the community. Sometimes duplicates happen and one copy is banned.
user = Community . query . filter ( Community . ap_profile_id == actor ) . filter ( Community . banned == False ) . first ( )
if user is None : # no un-banned version of this community exists, only the banned one. So it was banned for being bad, not for being a duplicate.
return None
2023-12-21 22:14:43 +13:00
if user is not None :
2024-02-14 12:31:44 +13:00
if not user . is_local ( ) and ( user . ap_fetched_at is None or user . ap_fetched_at < utcnow ( ) - timedelta ( days = 7 ) ) :
2023-12-27 16:58:30 +13:00
# To reduce load on remote servers, refreshing the user profile happens after a delay of 1 to 10 seconds. Meanwhile, subsequent calls to
# find_actor_or_create() which happen to be for the same actor might queue up refreshes of the same user. To avoid this, set a flag to
# indicate that user is currently being refreshed.
refresh_in_progress = cache . get ( f ' refreshing_ { user . id } ' )
if not refresh_in_progress :
2023-12-28 20:00:42 +13:00
cache . set ( f ' refreshing_ { user . id } ' , True , timeout = 300 )
2024-02-14 12:31:44 +13:00
if isinstance ( user , User ) :
refresh_user_profile ( user . id )
elif isinstance ( user , Community ) :
2024-02-27 05:19:52 +13:00
refresh_community_profile ( user . id )
2024-05-21 18:07:07 +12:00
# refresh_instance_profile(user.instance_id) # disable in favour of cron job - see app.cli.daily_maintenance()
2024-08-17 10:26:19 +12:00
if community_only and not isinstance ( user , Community ) :
return None
2023-08-22 21:24:11 +12:00
return user
2023-12-21 22:14:43 +13:00
else : # User does not exist in the DB, it's going to need to be created from it's remote home instance
2024-02-25 15:31:16 +13:00
if create_if_not_found :
if actor . startswith ( ' https:// ' ) :
2024-11-25 12:46:34 +00:00
try :
actor_data = get_request ( actor_url , headers = { ' Accept ' : ' application/activity+json ' } )
except httpx . HTTPError :
time . sleep ( randint ( 3 , 10 ) )
2024-03-16 20:33:48 +00:00
try :
actor_data = get_request ( actor_url , headers = { ' Accept ' : ' application/activity+json ' } )
2024-11-25 12:46:34 +00:00
except httpx . HTTPError as e :
raise e
return None
if actor_data . status_code == 200 :
try :
actor_json = actor_data . json ( )
except Exception as e :
2024-05-04 22:32:17 +01:00
actor_data . close ( )
2024-11-25 12:46:34 +00:00
return None
actor_data . close ( )
actor_model = actor_json_to_model ( actor_json , address , server )
if community_only and not isinstance ( actor_model , Community ) :
return None
return actor_model
elif actor_data . status_code == 401 :
2024-05-04 22:32:17 +01:00
try :
site = Site . query . get ( 1 )
actor_data = signed_get_request ( actor_url , site . private_key ,
f " https:// { current_app . config [ ' SERVER_NAME ' ] } /actor#main-key " )
if actor_data . status_code == 200 :
2024-11-25 12:46:34 +00:00
try :
actor_json = actor_data . json ( )
except Exception as e :
actor_data . close ( )
return None
2024-05-04 22:32:17 +01:00
actor_data . close ( )
actor_model = actor_json_to_model ( actor_json , address , server )
if community_only and not isinstance ( actor_model , Community ) :
return None
return actor_model
except Exception :
2024-03-02 10:20:15 +13:00
return None
2024-02-25 15:31:16 +13:00
else :
# retrieve user details via webfinger, etc
try :
webfinger_data = get_request ( f " https:// { server } /.well-known/webfinger " ,
params = { ' resource ' : f " acct: { address } @ { server } " } )
2024-09-15 19:30:45 +12:00
except httpx . HTTPError :
2024-02-25 15:31:16 +13:00
time . sleep ( randint ( 3 , 10 ) )
webfinger_data = get_request ( f " https:// { server } /.well-known/webfinger " ,
params = { ' resource ' : f " acct: { address } @ { server } " } )
if webfinger_data . status_code == 200 :
webfinger_json = webfinger_data . json ( )
webfinger_data . close ( )
for links in webfinger_json [ ' links ' ] :
if ' rel ' in links and links [ ' rel ' ] == ' self ' : # this contains the URL of the activitypub profile
type = links [ ' type ' ] if ' type ' in links else ' application/activity+json '
# retrieve the activitypub profile
try :
actor_data = get_request ( links [ ' href ' ] , headers = { ' Accept ' : type } )
2024-09-15 19:30:45 +12:00
except httpx . HTTPError :
2024-02-25 15:31:16 +13:00
time . sleep ( randint ( 3 , 10 ) )
actor_data = get_request ( links [ ' href ' ] , headers = { ' Accept ' : type } )
# to see the structure of the json contained in actor_data, do a GET to https://lemmy.world/c/technology with header Accept: application/activity+json
if actor_data . status_code == 200 :
actor_json = actor_data . json ( )
actor_data . close ( )
2024-03-02 10:20:15 +13:00
actor_model = actor_json_to_model ( actor_json , address , server )
if community_only and not isinstance ( actor_model , Community ) :
return None
return actor_model
2023-12-21 22:14:43 +13:00
return None
2023-09-08 20:04:01 +12:00
2024-07-17 16:12:39 +08:00
def find_language ( code : str ) - > Language | None :
existing_language = Language . query . filter ( Language . code == code ) . first ( )
if existing_language :
return existing_language
else :
return None
2024-11-14 16:51:48 +13:00
def find_language_or_create ( code : str , name : str , session = None ) - > Language :
if session :
existing_language : Language = session . query ( Language ) . filter ( Language . code == code ) . first ( )
else :
existing_language = Language . query . filter ( Language . code == code ) . first ( )
2024-04-16 21:49:05 +12:00
if existing_language :
return existing_language
else :
new_language = Language ( code = code , name = name )
2024-11-14 16:51:48 +13:00
if session :
session . add ( new_language )
else :
db . session . add ( new_language )
2024-04-16 21:49:05 +12:00
return new_language
2024-11-02 16:02:29 +13:00
def find_licence_or_create ( name : str ) - > Licence :
existing_licence = Licence . query . filter ( Licence . name == name . strip ( ) ) . first ( )
if existing_licence :
return existing_licence
else :
new_licence = Licence ( name = name . strip ( ) )
db . session . add ( new_licence )
return new_licence
2024-05-11 13:45:04 +12:00
def find_hashtag_or_create ( hashtag : str ) - > Tag :
if hashtag is None or hashtag == ' ' :
return None
hashtag = hashtag . strip ( )
if hashtag [ 0 ] == ' # ' :
hashtag = hashtag [ 1 : ]
existing_tag = Tag . query . filter ( Tag . name == hashtag . lower ( ) ) . first ( )
if existing_tag :
return existing_tag
else :
2024-05-12 13:02:45 +12:00
new_tag = Tag ( name = hashtag . lower ( ) , display_as = hashtag , post_count = 1 )
2024-05-11 13:45:04 +12:00
db . session . add ( new_tag )
return new_tag
2023-09-08 20:04:01 +12:00
def extract_domain_and_actor ( url_string : str ) :
# Parse the URL
parsed_url = urlparse ( url_string )
# Extract the server domain name
server_domain = parsed_url . netloc
# Extract the part of the string after the last '/' character
actor = parsed_url . path . split ( ' / ' ) [ - 1 ]
return server_domain , actor
2023-12-29 17:32:35 +13:00
def user_removed_from_remote_server ( actor_url , is_piefed = False ) :
result = False
response = None
try :
if is_piefed :
response = head_request ( actor_url , headers = { ' Accept ' : ' application/activity+json ' } )
else :
response = get_request ( actor_url , headers = { ' Accept ' : ' application/activity+json ' } )
if response . status_code == 404 or response . status_code == 410 :
result = True
else :
result = False
except :
result = True
finally :
if response :
response . close ( )
return result
2023-12-27 15:47:17 +13:00
def refresh_user_profile ( user_id ) :
if current_app . debug :
refresh_user_profile_task ( user_id )
else :
2023-12-29 17:32:35 +13:00
refresh_user_profile_task . apply_async ( args = ( user_id , ) , countdown = randint ( 1 , 10 ) )
2023-12-27 15:47:17 +13:00
@celery.task
def refresh_user_profile_task ( user_id ) :
2024-11-14 16:28:38 +13:00
session = get_task_session ( )
user : User = session . query ( User ) . get ( user_id )
2024-09-03 10:37:55 +12:00
if user and user . instance_id and user . instance . online ( ) :
2024-01-05 11:00:16 +13:00
try :
2024-06-05 14:22:04 +12:00
actor_data = get_request ( user . ap_public_url , headers = { ' Accept ' : ' application/activity+json ' } )
2024-09-15 19:30:45 +12:00
except httpx . HTTPError :
2024-01-05 11:00:16 +13:00
time . sleep ( randint ( 3 , 10 ) )
2024-04-20 17:33:04 +12:00
try :
2024-06-05 14:22:04 +12:00
actor_data = get_request ( user . ap_public_url , headers = { ' Accept ' : ' application/activity+json ' } )
2024-09-15 19:30:45 +12:00
except httpx . HTTPError :
2024-04-20 17:33:04 +12:00
return
2024-05-04 22:32:17 +01:00
except :
try :
2024-11-14 16:28:38 +13:00
site = session . query ( Site ) . get ( 1 )
2024-06-05 14:22:04 +12:00
actor_data = signed_get_request ( user . ap_public_url , site . private_key ,
2024-05-04 22:32:17 +01:00
f " https:// { current_app . config [ ' SERVER_NAME ' ] } /actor#main-key " )
except :
return
2023-12-27 15:47:17 +13:00
if actor_data . status_code == 200 :
activity_json = actor_data . json ( )
actor_data . close ( )
2024-03-12 20:58:47 +13:00
# update indexible state on their posts, if necessary
new_indexable = activity_json [ ' indexable ' ] if ' indexable ' in activity_json else True
if new_indexable != user . indexable :
2024-11-14 16:28:38 +13:00
session . execute ( text ( ' UPDATE " post " set indexable = :indexable WHERE user_id = :user_id ' ) ,
{ ' user_id ' : user . id ,
' indexable ' : new_indexable } )
2024-03-12 20:58:47 +13:00
2024-10-10 19:46:55 +13:00
user . user_name = activity_json [ ' preferredUsername ' ] . strip ( )
2024-05-16 09:26:14 +12:00
if ' name ' in activity_json :
2024-10-16 21:55:41 +13:00
user . title = activity_json [ ' name ' ] . strip ( ) if activity_json [ ' name ' ] else ' '
2024-09-21 20:05:34 +00:00
if ' summary ' in activity_json :
about_html = activity_json [ ' summary ' ]
2024-09-30 16:02:06 +13:00
if about_html is not None and not about_html . startswith ( ' < ' ) : # PeerTube
2024-09-21 20:05:34 +00:00
about_html = ' <p> ' + about_html + ' </p> '
user . about_html = allowlist_html ( about_html )
else :
user . about_html = ' '
if ' source ' in activity_json and activity_json [ ' source ' ] . get ( ' mediaType ' ) == ' text/markdown ' :
user . about = activity_json [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
user . about_html = markdown_to_html ( user . about ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-09-21 20:05:34 +00:00
else :
user . about = html_to_text ( user . about_html )
2024-10-04 10:50:13 +00:00
if ' type ' in activity_json :
user . bot = True if activity_json [ ' type ' ] == ' Service ' else False
2023-12-27 15:47:17 +13:00
user . ap_fetched_at = utcnow ( )
2024-03-12 20:58:47 +13:00
user . public_key = activity_json [ ' publicKey ' ] [ ' publicKeyPem ' ]
user . indexable = new_indexable
2023-12-27 15:47:17 +13:00
avatar_changed = cover_changed = False
2024-12-01 02:08:28 +00:00
if ' icon ' in activity_json and activity_json [ ' icon ' ] is not None :
2024-05-26 00:33:34 +01:00
if isinstance ( activity_json [ ' icon ' ] , dict ) and ' url ' in activity_json [ ' icon ' ] :
icon_entry = activity_json [ ' icon ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' icon ' ] , list ) and ' url ' in activity_json [ ' icon ' ] [ - 1 ] :
icon_entry = activity_json [ ' icon ' ] [ - 1 ] [ ' url ' ]
else :
icon_entry = None
if icon_entry :
if user . avatar_id and icon_entry != user . avatar . source_url :
user . avatar . delete_from_disk ( )
if not user . avatar_id or ( user . avatar_id and icon_entry != user . avatar . source_url ) :
avatar = File ( source_url = icon_entry )
user . avatar = avatar
2024-11-14 16:28:38 +13:00
session . add ( avatar )
2024-05-26 00:33:34 +01:00
avatar_changed = True
2024-12-01 02:08:28 +00:00
if ' image ' in activity_json and activity_json [ ' image ' ] is not None :
2024-01-03 16:29:58 +13:00
if user . cover_id and activity_json [ ' image ' ] [ ' url ' ] != user . cover . source_url :
2023-12-27 15:47:17 +13:00
user . cover . delete_from_disk ( )
2024-05-25 14:04:05 +01:00
if not user . cover_id or ( user . cover_id and activity_json [ ' image ' ] [ ' url ' ] != user . cover . source_url ) :
cover = File ( source_url = activity_json [ ' image ' ] [ ' url ' ] )
user . cover = cover
2024-11-14 16:28:38 +13:00
session . add ( cover )
2024-05-25 14:04:05 +01:00
cover_changed = True
2024-09-13 11:08:04 +12:00
user . recalculate_post_stats ( )
2024-11-14 16:28:38 +13:00
session . commit ( )
2023-12-27 15:47:17 +13:00
if user . avatar_id and avatar_changed :
make_image_sizes ( user . avatar_id , 40 , 250 , ' users ' )
2024-05-26 18:24:13 +12:00
cache . delete_memoized ( User . avatar_image , user )
cache . delete_memoized ( User . avatar_thumbnail , user )
2023-12-27 15:47:17 +13:00
if user . cover_id and cover_changed :
make_image_sizes ( user . cover_id , 700 , 1600 , ' users ' )
2024-05-26 18:24:13 +12:00
cache . delete_memoized ( User . cover_image , user )
2024-11-14 16:43:17 +13:00
session . close ( )
2023-12-27 15:47:17 +13:00
2024-02-27 05:19:52 +13:00
def refresh_community_profile ( community_id ) :
if current_app . debug :
2024-02-27 05:26:06 +13:00
refresh_community_profile_task ( community_id )
2024-02-27 05:19:52 +13:00
else :
2024-02-27 05:26:06 +13:00
refresh_community_profile_task . apply_async ( args = ( community_id , ) , countdown = randint ( 1 , 10 ) )
2024-02-27 05:19:52 +13:00
@celery.task
def refresh_community_profile_task ( community_id ) :
2024-11-14 16:28:38 +13:00
session = get_task_session ( )
community : Community = session . query ( Community ) . get ( community_id )
2024-08-17 10:31:22 +12:00
if community and community . instance . online ( ) and not community . is_local ( ) :
2024-02-27 05:19:52 +13:00
try :
2024-06-05 14:22:04 +12:00
actor_data = get_request ( community . ap_public_url , headers = { ' Accept ' : ' application/activity+json ' } )
2024-09-15 19:30:45 +12:00
except httpx . HTTPError :
2024-02-27 05:19:52 +13:00
time . sleep ( randint ( 3 , 10 ) )
2024-04-20 17:33:04 +12:00
try :
2024-06-05 14:22:04 +12:00
actor_data = get_request ( community . ap_public_url , headers = { ' Accept ' : ' application/activity+json ' } )
2024-04-20 17:33:04 +12:00
except Exception as e :
return
2024-02-27 05:19:52 +13:00
if actor_data . status_code == 200 :
activity_json = actor_data . json ( )
actor_data . close ( )
2024-05-25 16:38:40 +01:00
if ' attributedTo ' in activity_json and isinstance ( activity_json [ ' attributedTo ' ] , str ) : # lemmy and mbin
2024-02-27 05:19:52 +13:00
mods_url = activity_json [ ' attributedTo ' ]
elif ' moderators ' in activity_json : # kbin
mods_url = activity_json [ ' moderators ' ]
else :
mods_url = None
2024-05-25 17:18:38 +01:00
community . nsfw = activity_json [ ' sensitive ' ] if ' sensitive ' in activity_json else False
2024-02-27 05:19:52 +13:00
if ' nsfl ' in activity_json and activity_json [ ' nsfl ' ] :
community . nsfl = activity_json [ ' nsfl ' ]
2024-10-10 19:46:55 +13:00
community . title = activity_json [ ' name ' ] . strip ( )
2024-05-28 20:18:12 +01:00
community . restricted_to_mods = activity_json [ ' postingRestrictedToMods ' ] if ' postingRestrictedToMods ' in activity_json else False
2024-02-27 05:19:52 +13:00
community . new_mods_wanted = activity_json [ ' newModsWanted ' ] if ' newModsWanted ' in activity_json else False
community . private_mods = activity_json [ ' privateMods ' ] if ' privateMods ' in activity_json else False
community . ap_moderators_url = mods_url
community . ap_fetched_at = utcnow ( )
community . public_key = activity_json [ ' publicKey ' ] [ ' publicKeyPem ' ]
2024-09-21 20:05:34 +00:00
description_html = ' '
if ' summary ' in activity_json :
description_html = activity_json [ ' summary ' ]
2024-02-27 05:19:52 +13:00
elif ' content ' in activity_json :
2024-09-21 20:05:34 +00:00
description_html = activity_json [ ' content ' ]
else :
description_html = ' '
2024-09-30 16:02:06 +13:00
if description_html is not None and description_html != ' ' :
2024-09-21 20:05:34 +00:00
if not description_html . startswith ( ' < ' ) : # PeerTube
description_html = ' <p> ' + description_html + ' </p> '
community . description_html = allowlist_html ( description_html )
if ' source ' in activity_json and activity_json [ ' source ' ] . get ( ' mediaType ' ) == ' text/markdown ' :
community . description = activity_json [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
community . description_html = markdown_to_html ( community . description ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-09-21 20:05:34 +00:00
else :
community . description = html_to_text ( community . description_html )
2024-02-27 05:19:52 +13:00
2024-09-21 20:05:34 +00:00
if ' rules ' in activity_json :
community . rules_html = allowlist_html ( activity_json [ ' rules ' ] )
community . rules = html_to_text ( community . rules_html )
2024-06-22 15:42:48 +08:00
2024-02-27 05:19:52 +13:00
icon_changed = cover_changed = False
if ' icon ' in activity_json :
2024-05-26 01:20:46 +01:00
if isinstance ( activity_json [ ' icon ' ] , dict ) and ' url ' in activity_json [ ' icon ' ] :
icon_entry = activity_json [ ' icon ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' icon ' ] , list ) and ' url ' in activity_json [ ' icon ' ] [ - 1 ] :
icon_entry = activity_json [ ' icon ' ] [ - 1 ] [ ' url ' ]
else :
icon_entry = None
if icon_entry :
if community . icon_id and icon_entry != community . icon . source_url :
community . icon . delete_from_disk ( )
if not community . icon_id or ( community . icon_id and icon_entry != community . icon . source_url ) :
icon = File ( source_url = icon_entry )
community . icon = icon
2024-11-14 16:28:38 +13:00
session . add ( icon )
2024-05-26 01:20:46 +01:00
icon_changed = True
2024-02-27 05:19:52 +13:00
if ' image ' in activity_json :
2024-05-26 01:20:46 +01:00
if isinstance ( activity_json [ ' image ' ] , dict ) and ' url ' in activity_json [ ' image ' ] :
image_entry = activity_json [ ' image ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' image ' ] , list ) and ' url ' in activity_json [ ' image ' ] [ 0 ] :
image_entry = activity_json [ ' image ' ] [ 0 ] [ ' url ' ]
else :
image_entry = None
if image_entry :
if community . image_id and image_entry != community . image . source_url :
community . image . delete_from_disk ( )
if not community . image_id or ( community . image_id and image_entry != community . image . source_url ) :
image = File ( source_url = image_entry )
community . image = image
2024-11-14 16:28:38 +13:00
session . add ( image )
2024-05-26 01:20:46 +01:00
cover_changed = True
2024-05-08 21:07:22 +12:00
if ' language ' in activity_json and isinstance ( activity_json [ ' language ' ] , list ) and not community . ignore_remote_language :
2024-04-18 13:57:33 +12:00
for ap_language in activity_json [ ' language ' ] :
2024-11-14 16:51:48 +13:00
new_language = find_language_or_create ( ap_language [ ' identifier ' ] , ap_language [ ' name ' ] , session )
2024-04-19 20:05:27 +12:00
if new_language not in community . languages :
2024-04-18 13:57:33 +12:00
community . languages . append ( new_language )
2024-11-14 16:51:48 +13:00
instance = session . query ( Instance ) . get ( community . instance_id )
2024-05-28 20:44:06 +01:00
if instance and instance . software == ' peertube ' :
community . restricted_to_mods = True
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-11-14 16:43:17 +13:00
2024-02-27 05:19:52 +13:00
if community . icon_id and icon_changed :
make_image_sizes ( community . icon_id , 60 , 250 , ' communities ' )
if community . image_id and cover_changed :
make_image_sizes ( community . image_id , 700 , 1600 , ' communities ' )
if community . ap_moderators_url :
mods_request = get_request ( community . ap_moderators_url , headers = { ' Accept ' : ' application/activity+json ' } )
if mods_request . status_code == 200 :
mods_data = mods_request . json ( )
mods_request . close ( )
if mods_data and mods_data [ ' type ' ] == ' OrderedCollection ' and ' orderedItems ' in mods_data :
for actor in mods_data [ ' orderedItems ' ] :
time . sleep ( 0.5 )
user = find_actor_or_create ( actor )
if user :
existing_membership = CommunityMember . query . filter_by ( community_id = community . id ,
user_id = user . id ) . first ( )
if existing_membership :
existing_membership . is_moderator = True
db . session . commit ( )
else :
new_membership = CommunityMember ( community_id = community . id , user_id = user . id ,
is_moderator = True )
db . session . add ( new_membership )
db . session . commit ( )
# Remove people who are no longer mods
for member in CommunityMember . query . filter_by ( community_id = community . id , is_moderator = True ) . all ( ) :
member_user = User . query . get ( member . user_id )
is_mod = False
for actor in mods_data [ ' orderedItems ' ] :
if actor . lower ( ) == member_user . profile_id ( ) . lower ( ) :
is_mod = True
break
if not is_mod :
db . session . query ( CommunityMember ) . filter_by ( community_id = community . id ,
user_id = member_user . id ,
is_moderator = True ) . delete ( )
db . session . commit ( )
2024-11-14 16:43:17 +13:00
session . close ( )
2024-02-27 05:19:52 +13:00
2023-12-21 22:14:43 +13:00
def actor_json_to_model ( activity_json , address , server ) :
2024-03-20 11:34:25 +00:00
if activity_json [ ' type ' ] == ' Person ' or activity_json [ ' type ' ] == ' Service ' :
2024-02-29 17:10:38 +13:00
try :
2024-10-10 19:46:55 +13:00
user = User ( user_name = activity_json [ ' preferredUsername ' ] . strip ( ) ,
2024-10-16 21:55:41 +13:00
title = activity_json [ ' name ' ] . strip ( ) if ' name ' in activity_json and activity_json [ ' name ' ] else None ,
2024-02-29 17:10:38 +13:00
email = f " { address } @ { server } " ,
matrix_user_id = activity_json [ ' matrixUserId ' ] if ' matrixUserId ' in activity_json else ' ' ,
2024-09-04 15:58:13 +00:00
indexable = activity_json [ ' indexable ' ] if ' indexable ' in activity_json else True ,
2024-02-29 17:10:38 +13:00
searchable = activity_json [ ' discoverable ' ] if ' discoverable ' in activity_json else True ,
created = activity_json [ ' published ' ] if ' published ' in activity_json else utcnow ( ) ,
2024-04-23 21:28:58 +12:00
ap_id = f " { address . lower ( ) } @ { server . lower ( ) } " ,
2024-02-29 17:10:38 +13:00
ap_public_url = activity_json [ ' id ' ] ,
ap_profile_id = activity_json [ ' id ' ] . lower ( ) ,
2024-03-02 10:20:15 +13:00
ap_inbox_url = activity_json [ ' endpoints ' ] [ ' sharedInbox ' ] if ' endpoints ' in activity_json else activity_json [ ' inbox ' ] if ' inbox ' in activity_json else ' ' ,
2024-02-29 17:10:38 +13:00
ap_followers_url = activity_json [ ' followers ' ] if ' followers ' in activity_json else None ,
ap_preferred_username = activity_json [ ' preferredUsername ' ] ,
ap_manually_approves_followers = activity_json [ ' manuallyApprovesFollowers ' ] if ' manuallyApprovesFollowers ' in activity_json else False ,
ap_fetched_at = utcnow ( ) ,
ap_domain = server ,
public_key = activity_json [ ' publicKey ' ] [ ' publicKeyPem ' ] ,
2024-03-20 11:34:25 +00:00
bot = True if activity_json [ ' type ' ] == ' Service ' else False ,
2024-02-29 17:10:38 +13:00
instance_id = find_instance_id ( server )
# language=community_json['language'][0]['identifier'] # todo: language
)
except KeyError as e :
current_app . logger . error ( f ' KeyError for { address } @ { server } while parsing ' + str ( activity_json ) )
return None
2024-09-21 20:05:34 +00:00
if ' summary ' in activity_json :
about_html = activity_json [ ' summary ' ]
2024-09-30 16:02:06 +13:00
if about_html is not None and not about_html . startswith ( ' < ' ) : # PeerTube
2024-09-21 20:05:34 +00:00
about_html = ' <p> ' + about_html + ' </p> '
user . about_html = allowlist_html ( about_html )
else :
user . about_html = ' '
if ' source ' in activity_json and activity_json [ ' source ' ] . get ( ' mediaType ' ) == ' text/markdown ' :
user . about = activity_json [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
user . about_html = markdown_to_html ( user . about ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-09-21 20:05:34 +00:00
else :
user . about = html_to_text ( user . about_html )
2024-05-26 00:33:34 +01:00
if ' icon ' in activity_json and activity_json [ ' icon ' ] is not None :
if isinstance ( activity_json [ ' icon ' ] , dict ) and ' url ' in activity_json [ ' icon ' ] :
icon_entry = activity_json [ ' icon ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' icon ' ] , list ) and ' url ' in activity_json [ ' icon ' ] [ - 1 ] :
icon_entry = activity_json [ ' icon ' ] [ - 1 ] [ ' url ' ]
2024-06-05 14:17:04 +12:00
elif isinstance ( activity_json [ ' icon ' ] , str ) :
icon_entry = activity_json [ ' icon ' ]
2024-05-26 00:33:34 +01:00
else :
icon_entry = None
if icon_entry :
avatar = File ( source_url = icon_entry )
user . avatar = avatar
db . session . add ( avatar )
2024-04-09 10:29:55 +12:00
if ' image ' in activity_json and activity_json [ ' image ' ] is not None and ' url ' in activity_json [ ' image ' ] :
2023-12-21 22:14:43 +13:00
cover = File ( source_url = activity_json [ ' image ' ] [ ' url ' ] )
user . cover = cover
db . session . add ( cover )
2024-11-14 20:16:09 +13:00
try :
db . session . add ( user )
db . session . commit ( )
except IntegrityError :
db . session . rollback ( )
return User . query . filter_by ( ap_profile_id = activity_json [ ' id ' ] . lower ( ) ) . one ( )
2023-12-24 16:20:18 +13:00
if user . avatar_id :
make_image_sizes ( user . avatar_id , 40 , 250 , ' users ' )
if user . cover_id :
2024-02-10 06:41:24 +13:00
make_image_sizes ( user . cover_id , 878 , None , ' users ' )
2023-12-21 22:14:43 +13:00
return user
elif activity_json [ ' type ' ] == ' Group ' :
2024-05-25 16:38:40 +01:00
if ' attributedTo ' in activity_json and isinstance ( activity_json [ ' attributedTo ' ] , str ) : # lemmy and mbin
2023-12-21 22:14:43 +13:00
mods_url = activity_json [ ' attributedTo ' ]
elif ' moderators ' in activity_json : # kbin
mods_url = activity_json [ ' moderators ' ]
else :
mods_url = None
2024-01-21 21:04:48 +13:00
# only allow nsfw communities if enabled for this instance
site = Site . query . get ( 1 ) # can't use g.site because actor_json_to_model can be called from celery
2024-05-27 22:44:58 +12:00
if ' sensitive ' in activity_json and activity_json [ ' sensitive ' ] and not site . enable_nsfw :
2024-01-21 21:04:48 +13:00
return None
if ' nsfl ' in activity_json and activity_json [ ' nsfl ' ] and not site . enable_nsfl :
return None
2024-10-10 19:46:55 +13:00
community = Community ( name = activity_json [ ' preferredUsername ' ] . strip ( ) ,
title = activity_json [ ' name ' ] . strip ( ) ,
2024-05-27 22:44:58 +12:00
nsfw = activity_json [ ' sensitive ' ] if ' sensitive ' in activity_json else False ,
restricted_to_mods = activity_json [ ' postingRestrictedToMods ' ] if ' postingRestrictedToMods ' in activity_json else False ,
2024-01-02 19:41:00 +13:00
new_mods_wanted = activity_json [ ' newModsWanted ' ] if ' newModsWanted ' in activity_json else False ,
private_mods = activity_json [ ' privateMods ' ] if ' privateMods ' in activity_json else False ,
2023-12-21 22:14:43 +13:00
created_at = activity_json [ ' published ' ] if ' published ' in activity_json else utcnow ( ) ,
last_active = activity_json [ ' updated ' ] if ' updated ' in activity_json else utcnow ( ) ,
2024-04-23 21:28:58 +12:00
ap_id = f " { address [ 1 : ] . lower ( ) } @ { server . lower ( ) } " if address . startswith ( ' ! ' ) else f " { address } @ { server } " ,
2023-12-21 22:14:43 +13:00
ap_public_url = activity_json [ ' id ' ] ,
2024-02-10 11:42:18 +13:00
ap_profile_id = activity_json [ ' id ' ] . lower ( ) ,
2024-05-11 10:50:22 +12:00
ap_followers_url = activity_json [ ' followers ' ] if ' followers ' in activity_json else None ,
2024-05-27 22:44:58 +12:00
ap_inbox_url = activity_json [ ' endpoints ' ] [ ' sharedInbox ' ] if ' endpoints ' in activity_json else activity_json [ ' inbox ' ] ,
2024-02-21 08:36:47 +13:00
ap_outbox_url = activity_json [ ' outbox ' ] ,
2024-03-24 00:15:10 +00:00
ap_featured_url = activity_json [ ' featured ' ] if ' featured ' in activity_json else ' ' ,
2023-12-21 22:14:43 +13:00
ap_moderators_url = mods_url ,
ap_fetched_at = utcnow ( ) ,
ap_domain = server ,
public_key = activity_json [ ' publicKey ' ] [ ' publicKeyPem ' ] ,
# language=community_json['language'][0]['identifier'] # todo: language
instance_id = find_instance_id ( server ) ,
low_quality = ' memes ' in activity_json [ ' preferredUsername ' ]
)
2024-09-21 20:05:34 +00:00
description_html = ' '
if ' summary ' in activity_json :
description_html = activity_json [ ' summary ' ]
2023-12-21 22:14:43 +13:00
elif ' content ' in activity_json :
2024-09-21 20:05:34 +00:00
description_html = activity_json [ ' content ' ]
else :
description_html = ' '
2024-09-30 16:02:06 +13:00
if description_html is not None and description_html != ' ' :
2024-09-21 20:05:34 +00:00
if not description_html . startswith ( ' < ' ) : # PeerTube
description_html = ' <p> ' + description_html + ' </p> '
community . description_html = allowlist_html ( description_html )
if ' source ' in activity_json and activity_json [ ' source ' ] . get ( ' mediaType ' ) == ' text/markdown ' :
community . description = activity_json [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
community . description_html = markdown_to_html ( community . description ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-09-21 20:05:34 +00:00
else :
community . description = html_to_text ( community . description_html )
if ' rules ' in activity_json :
community . rules_html = allowlist_html ( activity_json [ ' rules ' ] )
community . rules = html_to_text ( community . rules_html )
2024-05-26 01:20:46 +01:00
if ' icon ' in activity_json and activity_json [ ' icon ' ] is not None :
if isinstance ( activity_json [ ' icon ' ] , dict ) and ' url ' in activity_json [ ' icon ' ] :
icon_entry = activity_json [ ' icon ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' icon ' ] , list ) and ' url ' in activity_json [ ' icon ' ] [ - 1 ] :
icon_entry = activity_json [ ' icon ' ] [ - 1 ] [ ' url ' ]
2024-06-05 14:17:04 +12:00
elif isinstance ( activity_json [ ' icon ' ] , str ) :
icon_entry = activity_json [ ' icon ' ]
2024-05-26 01:20:46 +01:00
else :
icon_entry = None
if icon_entry :
icon = File ( source_url = icon_entry )
community . icon = icon
db . session . add ( icon )
if ' image ' in activity_json and activity_json [ ' image ' ] is not None :
if isinstance ( activity_json [ ' image ' ] , dict ) and ' url ' in activity_json [ ' image ' ] :
image_entry = activity_json [ ' image ' ] [ ' url ' ]
elif isinstance ( activity_json [ ' image ' ] , list ) and ' url ' in activity_json [ ' image ' ] [ 0 ] :
image_entry = activity_json [ ' image ' ] [ 0 ] [ ' url ' ]
else :
image_entry = None
if image_entry :
image = File ( source_url = image_entry )
community . image = image
db . session . add ( image )
2024-04-16 21:49:05 +12:00
if ' language ' in activity_json and isinstance ( activity_json [ ' language ' ] , list ) :
for ap_language in activity_json [ ' language ' ] :
community . languages . append ( find_language_or_create ( ap_language [ ' identifier ' ] , ap_language [ ' name ' ] ) )
2024-11-14 20:16:09 +13:00
try :
db . session . add ( community )
db . session . commit ( )
except IntegrityError :
db . session . rollback ( )
return Community . query . filter_by ( ap_profile_id = activity_json [ ' id ' ] . lower ( ) ) . one ( )
2023-12-24 16:20:18 +13:00
if community . icon_id :
2023-12-24 17:41:34 +13:00
make_image_sizes ( community . icon_id , 60 , 250 , ' communities ' )
2023-12-24 16:20:18 +13:00
if community . image_id :
make_image_sizes ( community . image_id , 700 , 1600 , ' communities ' )
2023-12-21 22:14:43 +13:00
return community
2024-03-15 22:32:48 +00:00
def post_json_to_model ( activity_log , post_json , user , community ) - > Post :
2024-03-09 12:15:58 +13:00
try :
2024-03-15 23:47:21 +00:00
nsfl_in_title = ' [NSFL] ' in post_json [ ' name ' ] . upper ( ) or ' (NSFL) ' in post_json [ ' name ' ] . upper ( )
2024-03-09 12:15:58 +13:00
post = Post ( user_id = user . id , community_id = community . id ,
title = html . unescape ( post_json [ ' name ' ] ) ,
2024-03-15 23:47:21 +00:00
comments_enabled = post_json [ ' commentsEnabled ' ] if ' commentsEnabled ' in post_json else True ,
2024-03-09 12:15:58 +13:00
sticky = post_json [ ' stickied ' ] if ' stickied ' in post_json else False ,
nsfw = post_json [ ' sensitive ' ] ,
2024-03-15 23:47:21 +00:00
nsfl = post_json [ ' nsfl ' ] if ' nsfl ' in post_json else nsfl_in_title ,
2024-03-09 12:15:58 +13:00
ap_id = post_json [ ' id ' ] ,
type = constants . POST_TYPE_ARTICLE ,
posted_at = post_json [ ' published ' ] ,
last_active = post_json [ ' published ' ] ,
2024-03-15 23:47:21 +00:00
instance_id = user . instance_id ,
indexable = user . indexable
2024-03-09 12:15:58 +13:00
)
2024-08-25 15:58:38 +00:00
if ' content ' in post_json :
2024-05-25 18:07:27 +01:00
if post_json [ ' mediaType ' ] == ' text/html ' :
post . body_html = allowlist_html ( post_json [ ' content ' ] )
2024-08-25 15:58:38 +00:00
if ' source ' in post_json and post_json [ ' source ' ] [ ' mediaType ' ] == ' text/markdown ' :
post . body = post_json [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
post . body_html = markdown_to_html ( post . body ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-08-25 15:58:38 +00:00
else :
post . body = html_to_text ( post . body_html )
2024-05-25 18:07:27 +01:00
elif post_json [ ' mediaType ' ] == ' text/markdown ' :
post . body = post_json [ ' content ' ]
post . body_html = markdown_to_html ( post . body )
2024-03-09 18:19:03 +13:00
if ' attachment ' in post_json and len ( post_json [ ' attachment ' ] ) > 0 and ' type ' in post_json [ ' attachment ' ] [ 0 ] :
2024-09-04 16:50:51 +00:00
alt_text = None
2024-03-09 12:15:58 +13:00
if post_json [ ' attachment ' ] [ 0 ] [ ' type ' ] == ' Link ' :
2024-09-04 16:50:51 +00:00
post . url = post_json [ ' attachment ' ] [ 0 ] [ ' href ' ] # Lemmy < 0.19.4
if post_json [ ' attachment ' ] [ 0 ] [ ' type ' ] == ' Image ' :
post . url = post_json [ ' attachment ' ] [ 0 ] [ ' url ' ] # PieFed, Lemmy >= 0.19.4
if ' name ' in post_json [ ' attachment ' ] [ 0 ] :
alt_text = post_json [ ' attachment ' ] [ 0 ] [ ' name ' ]
if post . url :
2024-03-09 12:15:58 +13:00
if is_image_url ( post . url ) :
post . type = POST_TYPE_IMAGE
2024-11-17 22:14:39 +00:00
image = File ( source_url = post . url )
if alt_text :
image . alt_text = alt_text
2024-09-04 16:50:51 +00:00
db . session . add ( image )
post . image = image
elif is_video_url ( post . url ) :
post . type = POST_TYPE_VIDEO
image = File ( source_url = post . url )
2024-03-15 23:47:21 +00:00
db . session . add ( image )
post . image = image
2024-03-09 12:15:58 +13:00
else :
post . type = POST_TYPE_LINK
2024-03-15 23:47:21 +00:00
post . url = remove_tracking_from_link ( post . url )
2024-03-09 12:15:58 +13:00
domain = domain_from_url ( post . url )
2024-09-04 16:50:51 +00:00
2024-03-09 12:15:58 +13:00
# notify about links to banned websites.
already_notified = set ( ) # often admins and mods are the same people - avoid notifying them twice
if domain :
if domain . notify_mods :
for community_member in post . community . moderators ( ) :
notify = Notification ( title = ' Suspicious content ' , url = post . ap_id , user_id = community_member . user_id , author_id = user . id )
2024-02-23 16:52:17 +13:00
db . session . add ( notify )
2024-03-09 12:15:58 +13:00
already_notified . add ( community_member . user_id )
if domain . notify_admins :
for admin in Site . admins ( ) :
if admin . id not in already_notified :
notify = Notification ( title = ' Suspicious content ' , url = post . ap_id , user_id = admin . id , author_id = user . id )
db . session . add ( notify )
admin . unread_notifications + = 1
if domain . banned :
post = None
2024-03-15 22:32:48 +00:00
activity_log . exception_message = domain . name + ' is blocked by admin '
2024-03-09 12:15:58 +13:00
if not domain . banned :
domain . post_count + = 1
post . domain = domain
if post is not None :
2024-10-16 21:55:41 +13:00
if post_json [ ' type ' ] == ' Video ' :
post . type = POST_TYPE_VIDEO
post . url = post_json [ ' id ' ]
if ' icon ' in post_json and isinstance ( post_json [ ' icon ' ] , list ) :
icon = File ( source_url = post_json [ ' icon ' ] [ - 1 ] [ ' url ' ] )
db . session . add ( icon )
post . image = icon
if ' language ' in post_json :
language = find_language_or_create ( post_json [ ' language ' ] [ ' identifier ' ] , post_json [ ' language ' ] [ ' name ' ] )
if language :
post . language_id = language . id
if ' tag ' in post_json :
for json_tag in post_json [ ' tag ' ] :
if json_tag [ ' type ' ] == ' Hashtag ' :
# Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
if json_tag [ ' name ' ] [ 1 : ] . lower ( ) != community . name . lower ( ) :
hashtag = find_hashtag_or_create ( json_tag [ ' name ' ] )
if hashtag :
post . tags . append ( hashtag )
2024-04-09 19:27:42 +12:00
if ' image ' in post_json and post . image is None :
image = File ( source_url = post_json [ ' image ' ] [ ' url ' ] )
db . session . add ( image )
post . image = image
2024-03-09 12:15:58 +13:00
db . session . add ( post )
community . post_count + = 1
2024-10-06 12:15:23 +00:00
user . post_count + = 1
2024-03-15 22:32:48 +00:00
activity_log . result = ' success '
2024-03-09 12:15:58 +13:00
db . session . commit ( )
2024-03-15 23:47:21 +00:00
if post . image_id :
2024-07-01 20:44:08 +08:00
make_image_sizes ( post . image_id , 170 , 512 , ' posts ' ) # the 512 sized image is for masonry view
2024-03-15 23:47:21 +00:00
2024-03-09 12:15:58 +13:00
return post
except KeyError as e :
current_app . logger . error ( f ' KeyError in post_json_to_model: ' + str ( post_json ) )
return None
2023-12-21 22:14:43 +13:00
2023-12-24 16:20:18 +13:00
# Save two different versions of a File, after downloading it from file.source_url. Set a width parameter to None to avoid generating one of that size
2024-08-23 12:28:10 +12:00
def make_image_sizes ( file_id , thumbnail_width = 50 , medium_width = 120 , directory = ' posts ' , toxic_community = False ) :
2023-12-24 16:20:18 +13:00
if current_app . debug :
2024-08-23 12:28:10 +12:00
make_image_sizes_async ( file_id , thumbnail_width , medium_width , directory , toxic_community )
2023-12-24 16:20:18 +13:00
else :
2024-08-23 12:28:10 +12:00
make_image_sizes_async . apply_async ( args = ( file_id , thumbnail_width , medium_width , directory , toxic_community ) , countdown = randint ( 1 , 10 ) ) # Delay by up to 10 seconds so servers do not experience a stampede of requests all in the same second
2023-12-24 16:20:18 +13:00
@celery.task
2024-08-23 12:28:10 +12:00
def make_image_sizes_async ( file_id , thumbnail_width , medium_width , directory , toxic_community ) :
2024-11-14 16:28:38 +13:00
session = get_task_session ( )
file : File = session . query ( File ) . get ( file_id )
2023-12-24 16:20:18 +13:00
if file and file . source_url :
2024-04-16 16:35:12 +12:00
# Videos
if file . source_url . endswith ( ' .mp4 ' ) or file . source_url . endswith ( ' .webm ' ) :
new_filename = gibberish ( 15 )
# set up the storage directory
directory = f ' app/static/media/ { directory } / ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
ensure_directory_exists ( directory )
# file path and names to store the resized images on disk
final_place = os . path . join ( directory , new_filename + ' .jpg ' )
final_place_thumbnail = os . path . join ( directory , new_filename + ' _thumbnail.webp ' )
2024-04-20 16:47:27 +12:00
try :
generate_image_from_video_url ( file . source_url , final_place )
except Exception as e :
return
if final_place :
image = Image . open ( final_place )
img_width = image . width
# Resize the image to medium
if medium_width :
if img_width > medium_width :
image . thumbnail ( ( medium_width , medium_width ) )
image . save ( final_place )
file . file_path = final_place
file . width = image . width
file . height = image . height
# Resize the image to a thumbnail (webp)
if thumbnail_width :
if img_width > thumbnail_width :
image . thumbnail ( ( thumbnail_width , thumbnail_width ) )
image . save ( final_place_thumbnail , format = " WebP " , quality = 93 )
file . thumbnail_path = final_place_thumbnail
file . thumbnail_width = image . width
file . thumbnail_height = image . height
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-04-16 16:35:12 +12:00
# Images
2023-12-24 16:20:18 +13:00
else :
2024-04-16 16:35:12 +12:00
try :
source_image_response = get_request ( file . source_url )
except :
pass
else :
2024-07-13 22:12:43 +08:00
if source_image_response . status_code == 404 and ' /api/v3/image_proxy ' in file . source_url :
source_image_response . close ( )
# Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
# The un-proxied image url is the query parameter called 'url'
parsed_url = urlparse ( file . source_url )
query_params = parse_qs ( parsed_url . query )
if ' url ' in query_params :
url_value = query_params [ ' url ' ] [ 0 ]
source_image_response = get_request ( url_value )
else :
source_image_response = None
if source_image_response and source_image_response . status_code == 200 :
2024-04-16 16:35:12 +12:00
content_type = source_image_response . headers . get ( ' content-type ' )
if content_type and content_type . startswith ( ' image ' ) :
source_image = source_image_response . content
source_image_response . close ( )
2024-06-18 20:12:20 +08:00
content_type_parts = content_type . split ( ' / ' )
if content_type_parts :
2024-06-22 16:14:56 +08:00
# content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
# Remove ;charset=whatever
main_part = content_type . split ( ' ; ' ) [ 0 ]
# Split the main part on the '/' character and take the second part
file_ext = ' . ' + main_part . split ( ' / ' ) [ 1 ]
file_ext = file_ext . strip ( ) # just to be sure
2024-06-18 20:12:20 +08:00
if file_ext == ' .jpeg ' :
file_ext = ' .jpg '
2024-06-28 13:09:31 +08:00
elif file_ext == ' .svg+xml ' :
return # no need to resize SVG images
2024-04-16 16:35:12 +12:00
else :
2024-06-18 20:12:20 +08:00
file_ext = os . path . splitext ( file . source_url ) [ 1 ]
2024-06-14 16:39:59 +08:00
file_ext = file_ext . replace ( ' %3f ' , ' ? ' ) # sometimes urls are not decoded properly
2024-04-16 16:35:12 +12:00
if ' ? ' in file_ext :
file_ext = file_ext . split ( ' ? ' ) [ 0 ]
new_filename = gibberish ( 15 )
# set up the storage directory
directory = f ' app/static/media/ { directory } / ' + new_filename [ 0 : 2 ] + ' / ' + new_filename [ 2 : 4 ]
ensure_directory_exists ( directory )
# file path and names to store the resized images on disk
final_place = os . path . join ( directory , new_filename + file_ext )
final_place_thumbnail = os . path . join ( directory , new_filename + ' _thumbnail.webp ' )
# Load image data into Pillow
Image . MAX_IMAGE_PIXELS = 89478485
image = Image . open ( BytesIO ( source_image ) )
image = ImageOps . exif_transpose ( image )
img_width = image . width
img_height = image . height
# Resize the image to medium
if medium_width :
if img_width > medium_width :
image . thumbnail ( ( medium_width , medium_width ) )
image . save ( final_place )
file . file_path = final_place
file . width = image . width
file . height = image . height
# Resize the image to a thumbnail (webp)
if thumbnail_width :
if img_width > thumbnail_width :
image . thumbnail ( ( thumbnail_width , thumbnail_width ) )
image . save ( final_place_thumbnail , format = " WebP " , quality = 93 )
file . thumbnail_path = final_place_thumbnail
file . thumbnail_width = image . width
file . thumbnail_height = image . height
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-04-16 16:35:12 +12:00
# Alert regarding fascist meme content
2024-08-23 12:28:10 +12:00
if toxic_community and img_width < 2000 : # images > 2000px tend to be real photos instead of 4chan screenshots.
2024-04-16 16:35:12 +12:00
try :
image_text = pytesseract . image_to_string ( Image . open ( BytesIO ( source_image ) ) . convert ( ' L ' ) , timeout = 30 )
2024-04-20 20:17:58 +12:00
except Exception as e :
2024-04-20 17:01:36 +12:00
image_text = ' '
2024-04-16 16:35:12 +12:00
if ' Anonymous ' in image_text and ( ' No. ' in image_text or ' N0 ' in image_text ) : # chan posts usually contain the text 'Anonymous' and ' No.12345'
post = Post . query . filter_by ( image_id = file . id ) . first ( )
notification = Notification ( title = ' Review this ' ,
user_id = 1 ,
author_id = post . user_id ,
url = url_for ( ' activitypub.post_ap ' , post_id = post . id ) )
2024-11-14 16:28:38 +13:00
session . add ( notification )
session . commit ( )
2023-12-24 16:20:18 +13:00
2023-09-16 19:09:04 +12:00
def find_reply_parent ( in_reply_to : str ) - > Tuple [ int , int , int ] :
if ' comment ' in in_reply_to :
parent_comment = PostReply . get_by_ap_id ( in_reply_to )
2023-12-22 14:05:39 +13:00
if not parent_comment :
return ( None , None , None )
2023-09-16 19:09:04 +12:00
parent_comment_id = parent_comment . id
post_id = parent_comment . post_id
root_id = parent_comment . root_id
elif ' post ' in in_reply_to :
parent_comment_id = None
post = Post . get_by_ap_id ( in_reply_to )
2023-12-22 14:05:39 +13:00
if not post :
return ( None , None , None )
2023-09-16 19:09:04 +12:00
post_id = post . id
root_id = None
else :
parent_comment_id = None
root_id = None
post_id = None
post = Post . get_by_ap_id ( in_reply_to )
if post :
post_id = post . id
else :
parent_comment = PostReply . get_by_ap_id ( in_reply_to )
if parent_comment :
parent_comment_id = parent_comment . id
post_id = parent_comment . post_id
root_id = parent_comment . root_id
2023-12-22 14:05:39 +13:00
else :
return ( None , None , None )
2023-09-16 19:09:04 +12:00
return post_id , parent_comment_id , root_id
def find_liked_object ( ap_id ) - > Union [ Post , PostReply , None ] :
post = Post . get_by_ap_id ( ap_id )
if post :
return post
else :
post_reply = PostReply . get_by_ap_id ( ap_id )
if post_reply :
return post_reply
return None
2023-11-23 15:10:44 +13:00
2024-04-06 16:29:47 +13:00
def find_reported_object ( ap_id ) - > Union [ User , Post , PostReply , None ] :
post = Post . get_by_ap_id ( ap_id )
if post :
return post
else :
post_reply = PostReply . get_by_ap_id ( ap_id )
if post_reply :
return post_reply
else :
user = find_actor_or_create ( ap_id , create_if_not_found = False )
if user :
return user
return None
2023-12-21 22:14:43 +13:00
def find_instance_id ( server ) :
2024-11-24 16:32:22 +13:00
server = server . strip ( ) . lower ( )
2023-12-21 22:14:43 +13:00
instance = Instance . query . filter_by ( domain = server ) . first ( )
if instance :
return instance . id
else :
2023-12-28 20:00:42 +13:00
# Our instance does not know about {server} yet. Initially, create a sparse row in the 'instance' table and spawn a background
# task to update the row with more details later
2024-02-23 16:52:17 +13:00
new_instance = Instance ( domain = server , software = ' unknown ' , created_at = utcnow ( ) , trusted = server == ' piefed.social ' )
2024-11-24 16:32:22 +13:00
try :
db . session . add ( new_instance )
db . session . commit ( )
except IntegrityError :
return Instance . query . filter_by ( domain = server ) . one ( )
2023-12-28 20:00:42 +13:00
2023-12-30 11:36:24 +13:00
# Spawn background task to fill in more details
2024-05-23 00:14:07 +01:00
new_instance_profile ( new_instance . id )
2023-12-28 20:00:42 +13:00
return new_instance . id
2024-05-23 00:14:07 +01:00
def new_instance_profile ( instance_id : int ) :
2024-02-14 12:31:44 +13:00
if instance_id :
if current_app . debug :
2024-05-23 00:14:07 +01:00
new_instance_profile_task ( instance_id )
2024-02-14 12:31:44 +13:00
else :
2024-05-23 00:14:07 +01:00
new_instance_profile_task . apply_async ( args = ( instance_id , ) , countdown = randint ( 1 , 10 ) )
2023-12-28 20:00:42 +13:00
@celery.task
2024-05-23 00:14:07 +01:00
def new_instance_profile_task ( instance_id : int ) :
2024-11-14 16:28:38 +13:00
session = get_task_session ( )
instance : Instance = session . query ( Instance ) . get ( instance_id )
2024-05-23 00:14:07 +01:00
try :
instance_data = get_request ( f " https:// { instance . domain } " , headers = { ' Accept ' : ' application/activity+json ' } )
except :
return
if instance_data . status_code == 200 :
2023-12-22 16:18:44 +13:00
try :
2024-05-23 00:14:07 +01:00
instance_json = instance_data . json ( )
instance_data . close ( )
2024-09-15 19:30:45 +12:00
except Exception as ex :
2024-05-23 00:14:07 +01:00
instance_json = { }
if ' type ' in instance_json and instance_json [ ' type ' ] == ' Application ' :
instance . inbox = instance_json [ ' inbox ' ]
instance . outbox = instance_json [ ' outbox ' ]
else : # it's pretty much always /inbox so just assume that it is for whatever this instance is running
instance . inbox = f " https:// { instance . domain } /inbox "
instance . updated_at = utcnow ( )
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-05-23 00:14:07 +01:00
# retrieve list of Admins from /api/v3/site, update InstanceRole
2023-12-22 16:18:44 +13:00
try :
2024-05-23 00:14:07 +01:00
response = get_request ( f ' https:// { instance . domain } /api/v3/site ' )
2024-02-14 12:31:44 +13:00
except :
2024-05-23 00:14:07 +01:00
response = None
2024-02-14 12:31:44 +13:00
2024-05-23 00:14:07 +01:00
if response and response . status_code == 200 :
2024-02-14 12:31:44 +13:00
try :
2024-05-23 00:14:07 +01:00
instance_data = response . json ( )
2024-02-14 12:31:44 +13:00
except :
2024-05-23 00:14:07 +01:00
instance_data = None
finally :
response . close ( )
if instance_data :
if ' admins ' in instance_data :
admin_profile_ids = [ ]
for admin in instance_data [ ' admins ' ] :
admin_profile_ids . append ( admin [ ' person ' ] [ ' actor_id ' ] . lower ( ) )
user = find_actor_or_create ( admin [ ' person ' ] [ ' actor_id ' ] )
if user and not instance . user_is_admin ( user . id ) :
new_instance_role = InstanceRole ( instance_id = instance . id , user_id = user . id , role = ' admin ' )
2024-11-14 16:28:38 +13:00
session . add ( new_instance_role )
session . commit ( )
2024-05-23 00:14:07 +01:00
# remove any InstanceRoles that are no longer part of instance-data['admins']
for instance_admin in InstanceRole . query . filter_by ( instance_id = instance . id ) :
if instance_admin . user . profile_id ( ) not in admin_profile_ids :
2024-11-14 16:28:38 +13:00
session . query ( InstanceRole ) . filter (
2024-02-14 12:31:44 +13:00
InstanceRole . user_id == instance_admin . user . id ,
InstanceRole . instance_id == instance . id ,
InstanceRole . role == ' admin ' ) . delete ( )
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-07-15 20:46:48 +08:00
elif instance_data . status_code == 406 or instance_data . status_code == 404 : # Mastodon and PeerTube do 406, a.gup.pe does 404
2024-05-23 00:14:07 +01:00
instance . inbox = f " https:// { instance . domain } /inbox "
instance . updated_at = utcnow ( )
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-05-23 00:14:07 +01:00
2024-09-15 19:30:45 +12:00
headers = { ' User-Agent ' : ' PieFed/1.0 ' , ' Accept ' : ' application/activity+json ' }
2024-05-23 00:14:07 +01:00
try :
2024-09-15 19:30:45 +12:00
nodeinfo = get_request ( f " https:// { instance . domain } /.well-known/nodeinfo " , headers = headers )
2024-05-23 00:14:07 +01:00
if nodeinfo . status_code == 200 :
nodeinfo_json = nodeinfo . json ( )
for links in nodeinfo_json [ ' links ' ] :
2024-08-18 13:12:58 +12:00
if isinstance ( links , dict ) and ' rel ' in links and (
2024-06-13 16:59:48 +01:00
links [ ' rel ' ] == ' http://nodeinfo.diaspora.software/ns/schema/2.0 ' or # most platforms except KBIN and Lemmy v0.19.4
links [ ' rel ' ] == ' https://nodeinfo.diaspora.software/ns/schema/2.0 ' or # KBIN
2024-08-18 13:12:58 +12:00
links [ ' rel ' ] == ' http://nodeinfo.diaspora.software/ns/schema/2.1 ' ) : # Lemmy v0.19.4+ (no 2.0 back-compat provided here)
2024-05-23 00:14:07 +01:00
try :
time . sleep ( 0.1 )
2024-09-15 19:30:45 +12:00
node = get_request ( links [ ' href ' ] , headers = headers )
2024-05-23 00:14:07 +01:00
if node . status_code == 200 :
node_json = node . json ( )
if ' software ' in node_json :
instance . software = node_json [ ' software ' ] [ ' name ' ] . lower ( )
instance . version = node_json [ ' software ' ] [ ' version ' ]
2024-05-23 17:12:04 +01:00
instance . nodeinfo_href = links [ ' href ' ]
2024-11-14 16:28:38 +13:00
session . commit ( )
2024-08-18 13:12:58 +12:00
break # most platforms (except Lemmy v0.19.4) that provide 2.1 also provide 2.0 - there's no need to check both
2024-05-23 00:14:07 +01:00
except :
return
except :
return
2024-11-14 16:28:38 +13:00
session . close ( )
2023-12-21 22:14:43 +13:00
2023-11-24 22:52:42 +13:00
# alter the effect of upvotes based on their instance. Default to 1.0
@cache.memoize ( timeout = 50 )
def instance_weight ( domain ) :
if domain :
instance = Instance . query . filter_by ( domain = domain ) . first ( )
if instance :
return instance . vote_weight
return 1.0
2023-12-10 15:10:09 +13:00
def is_activitypub_request ( ) :
return ' application/ld+json ' in request . headers . get ( ' Accept ' , ' ' ) or ' application/activity+json ' in request . headers . get ( ' Accept ' , ' ' )
2024-11-18 22:31:18 +00:00
def delete_post_or_comment ( deletor , to_delete , store_ap_json , request_json ) :
2024-11-29 22:46:59 +00:00
id = request_json [ ' id ' ]
2024-11-18 22:31:18 +00:00
community = to_delete . community
2024-12-02 12:29:59 +01:00
reason = request_json [ ' object ' ] [ ' summary ' ] if ' summary ' in request_json [ ' object ' ] else ' '
2024-11-18 22:31:18 +00:00
if to_delete . user_id == deletor . id or deletor . is_admin ( ) or community . is_moderator ( deletor ) or community . is_instance_admin ( deletor ) :
if isinstance ( to_delete , Post ) :
to_delete . deleted = True
to_delete . deleted_by = deletor . id
community . post_count - = 1
to_delete . author . post_count - = 1
if to_delete . url and to_delete . cross_posts is not None :
old_cross_posts = Post . query . filter ( Post . id . in_ ( to_delete . cross_posts ) ) . all ( )
to_delete . cross_posts . clear ( )
for ocp in old_cross_posts :
if ocp . cross_posts is not None and to_delete . id in ocp . cross_posts :
ocp . cross_posts . remove ( to_delete . id )
db . session . commit ( )
if to_delete . author . id != deletor . id :
add_to_modlog_activitypub ( ' delete_post ' , deletor , community_id = community . id ,
2024-12-02 12:29:59 +01:00
link_text = shorten_string ( to_delete . title ) , link = f ' post/ { to_delete . id } ' ,
reason = reason )
2024-11-18 22:31:18 +00:00
elif isinstance ( to_delete , PostReply ) :
to_delete . deleted = True
to_delete . deleted_by = deletor . id
to_delete . author . post_reply_count - = 1
community . post_reply_count - = 1
if not to_delete . author . bot :
to_delete . post . reply_count - = 1
db . session . commit ( )
if to_delete . author . id != deletor . id :
add_to_modlog_activitypub ( ' delete_post_reply ' , deletor , community_id = community . id ,
link_text = f ' comment on { shorten_string ( to_delete . post . title ) } ' ,
2024-12-02 12:29:59 +01:00
link = f ' post/ { to_delete . post . id } #comment_ { to_delete . id } ' ,
reason = reason )
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_DELETE , APLOG_SUCCESS , request_json if store_ap_json else None )
2024-10-20 23:22:56 +00:00
else :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_DELETE , APLOG_FAILURE , request_json if store_ap_json else None , ' Deletor did not have permisson ' )
2023-12-26 12:36:20 +13:00
2024-11-23 01:29:04 +00:00
def restore_post_or_comment ( restorer , to_restore , store_ap_json , request_json ) :
2024-11-29 22:46:59 +00:00
id = request_json [ ' id ' ]
2024-11-23 01:29:04 +00:00
community = to_restore . community
2024-12-02 12:29:59 +01:00
reason = request_json [ ' object ' ] [ ' summary ' ] if ' summary ' in request_json [ ' object ' ] else ' '
2024-11-23 01:29:04 +00:00
if to_restore . user_id == restorer . id or restorer . is_admin ( ) or community . is_moderator ( restorer ) or community . is_instance_admin ( restorer ) :
if isinstance ( to_restore , Post ) :
to_restore . deleted = False
to_restore . deleted_by = None
community . post_count + = 1
to_restore . author . post_count + = 1
if to_restore . url :
new_cross_posts = Post . query . filter ( Post . id != to_restore . id , Post . url == to_restore . url , Post . deleted == False ,
Post . posted_at > utcnow ( ) - timedelta ( days = 6 ) ) . all ( )
for ncp in new_cross_posts :
if ncp . cross_posts is None :
ncp . cross_posts = [ to_restore . id ]
else :
ncp . cross_posts . append ( to_restore . id )
if to_restore . cross_posts is None :
to_restore . cross_posts = [ ncp . id ]
else :
to_restore . cross_posts . append ( ncp . id )
db . session . commit ( )
if to_restore . author . id != restorer . id :
add_to_modlog_activitypub ( ' restore_post ' , restorer , community_id = community . id ,
2024-12-02 12:29:59 +01:00
link_text = shorten_string ( to_restore . title ) , link = f ' post/ { to_restore . id } ' ,
reason = reason )
2024-11-23 01:29:04 +00:00
elif isinstance ( to_restore , PostReply ) :
to_restore . deleted = False
to_restore . deleted_by = None
if not to_restore . author . bot :
to_restore . post . reply_count + = 1
to_restore . author . post_reply_count + = 1
db . session . commit ( )
if to_restore . author . id != restorer . id :
add_to_modlog_activitypub ( ' restore_post_reply ' , restorer , community_id = community . id ,
link_text = f ' comment on { shorten_string ( to_restore . post . title ) } ' ,
2024-12-02 12:29:59 +01:00
link = f ' post/ { to_restore . post_id } #comment_ { to_restore . id } ' ,
reason = reason )
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_UNDO_DELETE , APLOG_SUCCESS , request_json if store_ap_json else None )
2024-10-21 22:17:51 +00:00
else :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_UNDO_DELETE , APLOG_FAILURE , request_json if store_ap_json else None , ' Restorer did not have permisson ' )
2024-08-16 15:04:54 +00:00
2024-11-20 19:48:38 +00:00
def site_ban_remove_data ( blocker_id , blocked ) :
replies = PostReply . query . filter_by ( user_id = blocked . id , deleted = False )
for reply in replies :
reply . deleted = True
reply . deleted_by = blocker_id
if not blocked . bot :
reply . post . reply_count - = 1
reply . community . post_reply_count - = 1
blocked . reply_count = 0
db . session . commit ( )
2024-10-21 22:17:51 +00:00
2024-11-20 19:48:38 +00:00
posts = Post . query . filter_by ( user_id = blocked . id , deleted = False )
for post in posts :
post . deleted = True
post . deleted_by = blocker_id
post . community . post_count - = 1
if post . url and post . cross_posts is not None :
old_cross_posts = Post . query . filter ( Post . id . in_ ( post . cross_posts ) ) . all ( )
post . cross_posts . clear ( )
for ocp in old_cross_posts :
if ocp . cross_posts is not None and post . id in ocp . cross_posts :
ocp . cross_posts . remove ( post . id )
blocked . post_count = 0
db . session . commit ( )
2024-08-16 15:04:54 +00:00
2024-11-20 19:48:38 +00:00
# Delete all their images to save moderators from having to see disgusting stuff.
# Images attached to posts can't be restored, but site ban reversals don't have a 'removeData' field anyway.
files = File . query . join ( Post ) . filter ( Post . user_id == blocked . id ) . all ( )
for file in files :
file . delete_from_disk ( )
file . source_url = ' '
if blocked . avatar_id :
blocked . avatar . delete_from_disk ( )
blocked . avatar . source_url = ' '
if blocked . cover_id :
blocked . cover . delete_from_disk ( )
blocked . cover . source_url = ' '
2024-10-21 22:17:51 +00:00
2024-11-20 19:48:38 +00:00
db . session . commit ( )
2024-08-16 15:04:54 +00:00
2024-05-22 22:30:51 +01:00
def remove_data_from_banned_user ( deletor_ap_id , user_ap_id , target ) :
if current_app . debug :
remove_data_from_banned_user_task ( deletor_ap_id , user_ap_id , target )
else :
remove_data_from_banned_user_task . delay ( deletor_ap_id , user_ap_id , target )
@celery.task
def remove_data_from_banned_user_task ( deletor_ap_id , user_ap_id , target ) :
deletor = find_actor_or_create ( deletor_ap_id , create_if_not_found = False )
user = find_actor_or_create ( user_ap_id , create_if_not_found = False )
community = Community . query . filter_by ( ap_profile_id = target ) . first ( )
2024-06-06 13:31:53 +12:00
if not deletor or not user :
2024-05-22 22:30:51 +01:00
return
# site bans by admins
if deletor . instance . user_is_admin ( deletor . id ) and target == f " https:// { deletor . instance . domain } / " and deletor . instance_id == user . instance_id :
post_replies = PostReply . query . filter_by ( user_id = user . id )
posts = Post . query . filter_by ( user_id = user . id )
2024-06-25 17:11:54 +01:00
# community bans by mods or admins
elif community and ( community . is_moderator ( deletor ) or community . is_instance_admin ( deletor ) ) :
2024-06-02 16:45:21 +12:00
post_replies = PostReply . query . filter_by ( user_id = user . id , community_id = community . id , deleted = False )
posts = Post . query . filter_by ( user_id = user . id , community_id = community . id , deleted = False )
2024-05-22 22:30:51 +01:00
else :
return
2024-06-02 16:45:21 +12:00
for post_reply in post_replies :
2024-06-18 16:42:24 +01:00
if not user . bot :
post_reply . post . reply_count - = 1
2024-10-18 08:33:50 +00:00
post_reply . deleted = True
post_reply . deleted_by = deletor . id
2024-05-22 22:30:51 +01:00
db . session . commit ( )
2024-06-02 16:45:21 +12:00
for post in posts :
if post . cross_posts :
old_cross_posts = Post . query . filter ( Post . id . in_ ( post . cross_posts ) ) . all ( )
2024-05-22 22:30:51 +01:00
for ocp in old_cross_posts :
if ocp . cross_posts is not None :
2024-06-02 16:45:21 +12:00
ocp . cross_posts . remove ( post . id )
post . delete_dependencies ( )
post . deleted = True
post . community . post_count - = 1
2024-05-22 22:30:51 +01:00
db . session . commit ( )
2024-11-24 21:01:34 +00:00
def community_ban_remove_data ( blocker_id , community_id , blocked ) :
replies = PostReply . query . filter_by ( user_id = blocked . id , deleted = False , community_id = community_id )
for reply in replies :
reply . deleted = True
reply . deleted_by = blocker_id
if not blocked . bot :
reply . post . reply_count - = 1
reply . community . post_reply_count - = 1
blocked . post_reply_count - = 1
db . session . commit ( )
2024-08-23 08:47:19 +00:00
2024-11-24 21:01:34 +00:00
posts = Post . query . filter_by ( user_id = blocked . id , deleted = False , community_id = community_id )
for post in posts :
post . deleted = True
post . deleted_by = blocker_id
post . community . post_count - = 1
if post . url and post . cross_posts is not None :
old_cross_posts = Post . query . filter ( Post . id . in_ ( post . cross_posts ) ) . all ( )
post . cross_posts . clear ( )
for ocp in old_cross_posts :
if ocp . cross_posts is not None and post . id in ocp . cross_posts :
ocp . cross_posts . remove ( post . id )
blocked . post_count - = 1
db . session . commit ( )
2024-08-23 08:47:19 +00:00
2024-11-24 21:01:34 +00:00
# Delete attached images to save moderators from having to see disgusting stuff.
files = File . query . join ( Post ) . filter ( Post . user_id == blocked . id , Post . community_id == community_id ) . all ( )
for file in files :
file . delete_from_disk ( )
file . source_url = ' '
db . session . commit ( )
2024-08-23 08:47:19 +00:00
2024-12-02 14:05:54 +01:00
def ban_user ( blocker , blocked , community , request_json ) :
2024-11-24 21:01:34 +00:00
existing = CommunityBan . query . filter_by ( community_id = community . id , user_id = blocked . id ) . first ( )
if not existing :
new_ban = CommunityBan ( community_id = community . id , user_id = blocked . id , banned_by = blocker . id )
2024-12-02 12:29:59 +01:00
if ' summary ' in request_json [ ' object ' ] :
2024-11-24 21:01:34 +00:00
new_ban . reason = request_json [ ' object ' ] [ ' summary ' ]
2024-12-02 12:29:59 +01:00
reason = request_json [ ' object ' ] [ ' summary ' ]
else :
reason = ' '
2024-11-24 21:01:34 +00:00
if ' expires ' in request_json and datetime . fromisoformat ( request_json [ ' object ' ] [ ' expires ' ] ) > datetime . now ( timezone . utc ) :
new_ban . ban_until = datetime . fromisoformat ( request_json [ ' object ' ] [ ' expires ' ] )
elif ' endTime ' in request_json and datetime . fromisoformat ( request_json [ ' object ' ] [ ' endTime ' ] ) > datetime . now ( timezone . utc ) :
new_ban . ban_until = datetime . fromisoformat ( request_json [ ' object ' ] [ ' endTime ' ] )
db . session . add ( new_ban )
db . session . commit ( )
2024-08-23 08:47:19 +00:00
2024-11-24 21:01:34 +00:00
db . session . query ( CommunityJoinRequest ) . filter ( CommunityJoinRequest . community_id == community . id , CommunityJoinRequest . user_id == blocked . id ) . delete ( )
community_membership_record = CommunityMember . query . filter_by ( community_id = community . id , user_id = blocked . id ) . first ( )
2024-08-23 08:47:19 +00:00
if community_membership_record :
community_membership_record . is_banned = True
2024-12-02 14:05:54 +01:00
if blocked . is_local ( ) :
# Notify banned person
notify = Notification ( title = shorten_string ( ' You have been banned from ' + community . title ) ,
url = f ' /notifications ' , user_id = blocked . id ,
author_id = blocker . id )
db . session . add ( notify )
if not current_app . debug : # user.unread_notifications += 1 hangs app if 'user' is the same person
blocked . unread_notifications + = 1 # who pressed 'Re-submit this activity'.
# Remove their notification subscription, if any
db . session . query ( NotificationSubscription ) . filter ( NotificationSubscription . entity_id == community . id ,
NotificationSubscription . user_id == blocked . id ,
NotificationSubscription . type == NOTIF_COMMUNITY ) . delete ( )
2024-11-24 21:01:34 +00:00
db . session . commit ( )
2024-08-23 08:47:19 +00:00
2024-11-24 21:01:34 +00:00
cache . delete_memoized ( communities_banned_from , blocked . id )
cache . delete_memoized ( joined_communities , blocked . id )
cache . delete_memoized ( moderating_communities , blocked . id )
2024-08-23 08:47:19 +00:00
2024-12-02 14:05:54 +01:00
add_to_modlog_activitypub ( ' ban_user ' , blocker , community_id = community . id , link_text = blocked . display_name ( ) , link = f ' u/ { blocked . link ( ) } ' , reason = reason )
2024-08-23 08:47:19 +00:00
2024-08-23 14:05:42 +00:00
2024-12-02 14:05:54 +01:00
def unban_user ( blocker , blocked , community , request_json ) :
2024-11-24 21:43:08 +00:00
db . session . query ( CommunityBan ) . filter ( CommunityBan . community_id == community . id , CommunityBan . user_id == blocked . id ) . delete ( )
community_membership_record = CommunityMember . query . filter_by ( community_id = community . id , user_id = blocked . id ) . first ( )
if community_membership_record :
community_membership_record . is_banned = False
2024-12-02 12:29:59 +01:00
reason = request_json [ ' object ' ] [ ' summary ' ] if ' summary ' in request_json [ ' object ' ] else ' '
2024-08-23 14:05:42 +00:00
2024-12-02 14:05:54 +01:00
if blocked . is_local ( ) :
# Notify unbanned person
notify = Notification ( title = shorten_string ( ' You have been unbanned from ' + community . title ) ,
url = f ' /notifications ' , user_id = blocked . id , author_id = blocker . id )
db . session . add ( notify )
if not current_app . debug : # user.unread_notifications += 1 hangs app if 'user' is the same person
blocked . unread_notifications + = 1 # who pressed 'Re-submit this activity'.
2024-08-23 14:05:42 +00:00
2024-11-24 21:43:08 +00:00
db . session . commit ( )
2024-08-23 14:05:42 +00:00
2024-11-24 21:43:08 +00:00
cache . delete_memoized ( communities_banned_from , blocked . id )
cache . delete_memoized ( joined_communities , blocked . id )
cache . delete_memoized ( moderating_communities , blocked . id )
2024-08-23 14:05:42 +00:00
2024-12-02 14:05:54 +01:00
add_to_modlog_activitypub ( ' unban_user ' , blocker , community_id = community . id , link_text = blocked . display_name ( ) , link = f ' u/ { blocked . link ( ) } ' , reason = reason )
2024-08-23 14:05:42 +00:00
2024-11-18 22:05:25 +00:00
def create_post_reply ( store_ap_json , community : Community , in_reply_to , request_json : dict , user : User , announce_id = None ) - > Union [ PostReply , None ] :
2024-11-29 22:46:59 +00:00
id = request_json [ ' id ' ]
2024-01-27 12:22:35 +13:00
if community . local_only :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , ' Community is local only, reply discarded ' )
2024-01-27 12:22:35 +13:00
return None
2023-12-30 13:23:12 +13:00
post_id , parent_comment_id , root_id = find_reply_parent ( in_reply_to )
2024-05-12 12:13:48 +01:00
2023-12-30 13:23:12 +13:00
if post_id or parent_comment_id or root_id :
2024-01-18 15:34:55 +13:00
# set depth to +1 of the parent depth
if parent_comment_id :
parent_comment = PostReply . query . get ( parent_comment_id )
else :
2024-09-28 13:05:00 +12:00
parent_comment = None
if post_id is None :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , ' Could not find parent post ' )
2024-09-28 13:05:00 +12:00
return None
post = Post . query . get ( post_id )
body = body_html = ' '
2024-09-03 10:24:46 +12:00
if ' content ' in request_json [ ' object ' ] : # Kbin, Mastodon, etc provide their posts as html
2024-09-03 10:37:55 +12:00
if not ( request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <p> ' ) or request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <blockquote> ' ) ) :
2024-09-03 10:24:46 +12:00
request_json [ ' object ' ] [ ' content ' ] = ' <p> ' + request_json [ ' object ' ] [ ' content ' ] + ' </p> '
2024-09-28 13:05:00 +12:00
body_html = allowlist_html ( request_json [ ' object ' ] [ ' content ' ] )
2024-08-25 15:58:38 +00:00
if ' source ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' source ' ] , dict ) and \
' mediaType ' in request_json [ ' object ' ] [ ' source ' ] and request_json [ ' object ' ] [ ' source ' ] [ ' mediaType ' ] == ' text/markdown ' :
2024-09-28 13:05:00 +12:00
body = request_json [ ' object ' ] [ ' source ' ] [ ' content ' ]
body_html = markdown_to_html ( body ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-08-25 15:58:38 +00:00
else :
2024-09-28 13:05:00 +12:00
body = html_to_text ( body_html )
2024-07-17 16:12:39 +08:00
# Language - Lemmy uses 'language' while Mastodon uses 'contentMap'
2024-09-28 13:05:00 +12:00
language_id = None
2024-05-09 17:54:30 +12:00
if ' language ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' language ' ] , dict ) :
language = find_language_or_create ( request_json [ ' object ' ] [ ' language ' ] [ ' identifier ' ] ,
request_json [ ' object ' ] [ ' language ' ] [ ' name ' ] )
2024-09-28 13:05:00 +12:00
language_id = language . id
2024-07-17 16:12:39 +08:00
elif ' contentMap ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' contentMap ' ] , dict ) :
language = find_language ( next ( iter ( request_json [ ' object ' ] [ ' contentMap ' ] ) ) ) # Combination of next and iter gets the first key in a dict
2024-09-28 13:05:00 +12:00
language_id = language . id if language else None
2024-04-22 15:25:37 +12:00
2024-09-28 13:05:00 +12:00
try :
post_reply = PostReply . new ( user , post , parent_comment , notify_author = True , body = body , body_html = body_html ,
language_id = language_id , request_json = request_json , announce_id = announce_id )
2024-11-18 22:05:25 +00:00
return post_reply
2024-09-28 13:05:00 +12:00
except Exception as ex :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , str ( ex ) )
2024-11-18 22:05:25 +00:00
return None
else :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , ' Unable to find parent post/comment ' )
2024-11-18 22:05:25 +00:00
return None
2023-12-30 13:23:12 +13:00
2024-11-18 22:05:25 +00:00
def create_post ( store_ap_json , community : Community , request_json : dict , user : User , announce_id = None ) - > Union [ Post , None ] :
2024-11-29 22:46:59 +00:00
id = request_json [ ' id ' ]
2024-01-27 12:22:35 +13:00
if community . local_only :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , ' Community is local only, post discarded ' )
2024-01-27 12:22:35 +13:00
return None
2024-10-14 15:37:00 +13:00
try :
post = Post . new ( user , community , request_json , announce_id )
2024-11-18 22:05:25 +00:00
return post
2024-10-14 15:37:00 +13:00
except Exception as ex :
2024-11-29 22:46:59 +00:00
log_incoming_ap ( id , APLOG_CREATE , APLOG_FAILURE , request_json if store_ap_json else None , str ( ex ) )
2024-10-14 15:37:00 +13:00
return None
2024-01-07 12:47:06 +13:00
2023-12-30 13:23:12 +13:00
2024-01-07 12:49:25 +13:00
def notify_about_post ( post : Post ) :
2024-04-22 20:53:03 +12:00
# todo: eventually this function could trigger a lot of DB activity. This function will need to be a celery task.
2024-04-19 20:13:02 +12:00
2024-04-29 16:03:00 +12:00
# Send notifications based on subscriptions
2024-04-19 20:13:02 +12:00
notifications_sent_to = set ( )
2024-04-30 11:33:43 +12:00
send_notifs_to = set ( notification_subscribers ( post . user_id , NOTIF_USER ) +
2024-04-29 16:03:00 +12:00
notification_subscribers ( post . community_id , NOTIF_COMMUNITY ) +
notification_subscribers ( post . community . topic_id , NOTIF_TOPIC ) )
for notify_id in send_notifs_to :
2024-04-22 20:53:03 +12:00
if notify_id != post . user_id and notify_id not in notifications_sent_to :
new_notification = Notification ( title = shorten_string ( post . title , 50 ) , url = f " /post/ { post . id } " ,
user_id = notify_id , author_id = post . user_id )
2024-01-07 12:49:25 +13:00
db . session . add ( new_notification )
2024-04-22 20:53:03 +12:00
user = User . query . get ( notify_id )
2024-01-07 12:49:25 +13:00
user . unread_notifications + = 1
db . session . commit ( )
2024-04-29 16:03:00 +12:00
notifications_sent_to . add ( notify_id )
2024-01-07 12:49:25 +13:00
2024-04-29 21:43:37 +12:00
def notify_about_post_reply ( parent_reply : Union [ PostReply , None ] , new_reply : PostReply ) :
if parent_reply is None : # This happens when a new_reply is a top-level comment, not a comment on a comment
send_notifs_to = notification_subscribers ( new_reply . post . id , NOTIF_POST )
for notify_id in send_notifs_to :
if new_reply . user_id != notify_id :
new_notification = Notification ( title = shorten_string ( _ ( ' Reply to %(post_title)s ' ,
post_title = new_reply . post . title ) , 50 ) ,
url = f " /post/ { new_reply . post . id } #comment_ { new_reply . id } " ,
user_id = notify_id , author_id = new_reply . user_id )
db . session . add ( new_notification )
user = User . query . get ( notify_id )
user . unread_notifications + = 1
db . session . commit ( )
else :
# Send notifications based on subscriptions
send_notifs_to = set ( notification_subscribers ( parent_reply . id , NOTIF_REPLY ) )
for notify_id in send_notifs_to :
if new_reply . user_id != notify_id :
2024-08-19 10:32:54 +02:00
if new_reply . depth < = THREAD_CUTOFF_DEPTH :
new_notification = Notification ( title = shorten_string ( _ ( ' Reply to comment on %(post_title)s ' ,
post_title = parent_reply . post . title ) , 50 ) ,
url = f " /post/ { parent_reply . post . id } #comment_ { new_reply . id } " ,
user_id = notify_id , author_id = new_reply . user_id )
else :
new_notification = Notification ( title = shorten_string ( _ ( ' Reply to comment on %(post_title)s ' ,
post_title = parent_reply . post . title ) , 50 ) ,
url = f " /post/ { parent_reply . post . id } /comment/ { parent_reply . id } #comment_ { new_reply . id } " ,
user_id = notify_id , author_id = new_reply . user_id )
2024-04-29 21:43:37 +12:00
db . session . add ( new_notification )
user = User . query . get ( notify_id )
user . unread_notifications + = 1
db . session . commit ( )
2023-12-30 13:23:12 +13:00
def update_post_reply_from_activity ( reply : PostReply , request_json : dict ) :
2024-08-25 15:58:38 +00:00
if ' content ' in request_json [ ' object ' ] : # Kbin, Mastodon, etc provide their posts as html
2024-09-03 10:37:55 +12:00
if not ( request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <p> ' ) or request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <blockquote> ' ) ) :
2024-09-03 10:24:51 +12:00
request_json [ ' object ' ] [ ' content ' ] = ' <p> ' + request_json [ ' object ' ] [ ' content ' ] + ' </p> '
2023-12-30 13:23:12 +13:00
reply . body_html = allowlist_html ( request_json [ ' object ' ] [ ' content ' ] )
2024-08-25 15:58:38 +00:00
if ' source ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' source ' ] , dict ) and \
' mediaType ' in request_json [ ' object ' ] [ ' source ' ] and request_json [ ' object ' ] [ ' source ' ] [ ' mediaType ' ] == ' text/markdown ' :
reply . body = request_json [ ' object ' ] [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
reply . body_html = markdown_to_html ( reply . body ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-08-25 15:58:38 +00:00
else :
2024-09-03 10:37:55 +12:00
reply . body = html_to_text ( reply . body_html )
2024-05-09 17:54:30 +12:00
# Language
if ' language ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' language ' ] , dict ) :
language = find_language_or_create ( request_json [ ' object ' ] [ ' language ' ] [ ' identifier ' ] , request_json [ ' object ' ] [ ' language ' ] [ ' name ' ] )
reply . language_id = language . id
2023-12-30 13:23:12 +13:00
reply . edited_at = utcnow ( )
db . session . commit ( )
def update_post_from_activity ( post : Post , request_json : dict ) :
2024-11-27 15:29:22 +00:00
# redo body without checking if it's changed
2024-08-25 15:58:38 +00:00
if ' content ' in request_json [ ' object ' ] and request_json [ ' object ' ] [ ' content ' ] is not None :
2024-05-27 19:26:33 +01:00
if ' mediaType ' in request_json [ ' object ' ] and request_json [ ' object ' ] [ ' mediaType ' ] == ' text/html ' :
2024-05-25 18:07:27 +01:00
post . body_html = allowlist_html ( request_json [ ' object ' ] [ ' content ' ] )
2024-08-25 15:58:38 +00:00
if ' source ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' source ' ] , dict ) and request_json [ ' object ' ] [ ' source ' ] [ ' mediaType ' ] == ' text/markdown ' :
post . body = request_json [ ' object ' ] [ ' source ' ] [ ' content ' ]
2024-09-25 01:09:02 +00:00
post . body_html = markdown_to_html ( post . body ) # prefer Markdown if provided, overwrite version obtained from HTML
2024-08-25 15:58:38 +00:00
else :
post . body = html_to_text ( post . body_html )
2024-05-27 19:26:33 +01:00
elif ' mediaType ' in request_json [ ' object ' ] and request_json [ ' object ' ] [ ' mediaType ' ] == ' text/markdown ' :
2024-05-25 18:07:27 +01:00
post . body = request_json [ ' object ' ] [ ' content ' ]
post . body_html = markdown_to_html ( post . body )
2024-05-27 19:26:33 +01:00
else :
2024-09-03 10:37:55 +12:00
if not ( request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <p> ' ) or request_json [ ' object ' ] [ ' content ' ] . startswith ( ' <blockquote> ' ) ) :
2024-08-25 15:58:38 +00:00
request_json [ ' object ' ] [ ' content ' ] = ' <p> ' + request_json [ ' object ' ] [ ' content ' ] + ' </p> '
2024-05-27 19:26:33 +01:00
post . body_html = allowlist_html ( request_json [ ' object ' ] [ ' content ' ] )
2024-05-29 15:19:32 +12:00
post . body = html_to_text ( post . body_html )
2024-11-27 15:29:22 +00:00
# title
old_title = post . title
if ' name ' in request_json [ ' object ' ] :
new_title = request_json [ ' object ' ] [ ' name ' ]
post . microblog = False
2024-09-06 01:06:59 +00:00
else :
2024-11-27 15:29:22 +00:00
autogenerated_title = microblog_content_to_title ( post . body_html )
if len ( autogenerated_title ) < 20 :
new_title = ' [Microblog] ' + autogenerated_title . strip ( )
2024-04-01 23:44:58 +01:00
else :
2024-11-27 15:29:22 +00:00
new_title = autogenerated_title . strip ( )
post . microblog = True
if old_title != new_title :
post . title = new_title
if ' [NSFL] ' in new_title . upper ( ) or ' (NSFL) ' in new_title . upper ( ) :
post . nsfl = True
if ' [NSFW] ' in new_title . upper ( ) or ' (NSFW) ' in new_title . upper ( ) :
post . nsfw = True
2024-03-17 02:55:59 +13:00
if ' sensitive ' in request_json [ ' object ' ] :
post . nsfw = request_json [ ' object ' ] [ ' sensitive ' ]
2024-11-27 15:29:22 +00:00
if ' nsfl ' in request_json [ ' object ' ] :
2024-02-16 06:17:13 +13:00
post . nsfl = request_json [ ' object ' ] [ ' nsfl ' ]
2024-11-27 15:29:22 +00:00
# Language
old_language_id = post . language_id
new_language = None
if ' language ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' language ' ] , dict ) :
new_language = find_language_or_create ( request_json [ ' object ' ] [ ' language ' ] [ ' identifier ' ] , request_json [ ' object ' ] [ ' language ' ] [ ' name ' ] )
elif ' contentMap ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' contentMap ' ] , dict ) :
new_language = find_language ( next ( iter ( request_json [ ' object ' ] [ ' contentMap ' ] ) ) )
if new_language and ( new_language . id != old_language_id ) :
post . language_id = new_language . id
# Tags
2024-05-11 13:45:04 +12:00
if ' tag ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' tag ' ] , list ) :
db . session . execute ( text ( ' DELETE FROM " post_tag " WHERE post_id = :post_id ' ) , { ' post_id ' : post . id } )
for json_tag in request_json [ ' object ' ] [ ' tag ' ] :
if json_tag [ ' type ' ] == ' Hashtag ' :
2024-06-07 23:22:01 +01:00
if json_tag [ ' name ' ] [ 1 : ] . lower ( ) != post . community . name . lower ( ) : # Lemmy adds the community slug as a hashtag on every post in the community, which we want to ignore
hashtag = find_hashtag_or_create ( json_tag [ ' name ' ] )
if hashtag :
post . tags . append ( hashtag )
2024-11-27 15:29:22 +00:00
2024-04-01 23:44:58 +01:00
post . comments_enabled = request_json [ ' object ' ] [ ' commentsEnabled ' ] if ' commentsEnabled ' in request_json [ ' object ' ] else True
2023-12-30 13:23:12 +13:00
post . edited_at = utcnow ( )
2024-11-27 15:29:22 +00:00
if request_json [ ' object ' ] [ ' type ' ] == ' Video ' :
# return now for PeerTube, otherwise rest of this function breaks the post
# consider querying the Likes endpoint (that mostly seems to be what Updates are about)
return
# Links
old_url = post . url
new_url = None
2024-11-30 14:20:19 +00:00
if ( ' attachment ' in request_json [ ' object ' ] and
isinstance ( request_json [ ' object ' ] [ ' attachment ' ] , list ) and
len ( request_json [ ' object ' ] [ ' attachment ' ] ) > 0 and
' type ' in request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] ) :
2024-11-27 15:29:22 +00:00
if request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' type ' ] == ' Link ' :
new_url = request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' href ' ] # Lemmy < 0.19.4
if request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' type ' ] == ' Document ' :
new_url = request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' url ' ] # Mastodon
if request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' type ' ] == ' Image ' :
new_url = request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' url ' ] # PixelFed / PieFed / Lemmy >= 0.19.4
2024-11-30 21:36:42 +13:00
if ' attachment ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' attachment ' ] , dict ) : # Mastodon / a.gup.pe
new_url = request_json [ ' object ' ] [ ' attachment ' ] [ ' url ' ]
2024-11-27 15:29:22 +00:00
if new_url :
new_url = remove_tracking_from_link ( new_url )
new_domain = domain_from_url ( new_url )
if new_domain . banned :
db . session . commit ( )
return # reject change to url if new domain is banned
old_db_entry_to_delete = None
if old_url != new_url :
if post . image :
post . image . delete_from_disk ( )
old_db_entry_to_delete = post . image_id
if new_url :
post . url = new_url
image = None
if is_image_url ( new_url ) :
post . type = POST_TYPE_IMAGE
image = File ( source_url = new_url )
if ' name ' in request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] and request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' name ' ] is not None :
image . alt_text = request_json [ ' object ' ] [ ' attachment ' ] [ 0 ] [ ' name ' ]
elif is_video_url ( new_url ) :
post . type = POST_TYPE_VIDEO
image = File ( source_url = new_url )
else :
if ' image ' in request_json [ ' object ' ] and ' url ' in request_json [ ' object ' ] [ ' image ' ] :
image = File ( source_url = request_json [ ' object ' ] [ ' image ' ] [ ' url ' ] )
else :
# Let's see if we can do better than the source instance did!
tn_url = new_url
if tn_url [ : 32 ] == ' https://www.youtube.com/watch?v= ' :
tn_url = ' https://youtu.be/ ' + tn_url [ 32 : 43 ] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse ( tn_url )
if opengraph and ( opengraph . get ( ' og:image ' , ' ' ) != ' ' or opengraph . get ( ' og:image:url ' , ' ' ) != ' ' ) :
filename = opengraph . get ( ' og:image ' ) or opengraph . get ( ' og:image:url ' )
if not filename . startswith ( ' / ' ) :
image = File ( source_url = filename , alt_text = shorten_string ( opengraph . get ( ' og:title ' ) , 295 ) )
if is_video_hosting_site ( new_url ) :
post . type = POST_TYPE_VIDEO
else :
post . type = POST_TYPE_LINK
if image :
db . session . add ( image )
db . session . commit ( )
post . image = image
make_image_sizes ( image . id , 170 , 512 , ' posts ' ) # the 512 sized image is for masonry view
else :
old_db_entry_to_delete = None
# url domain
old_domain = domain_from_url ( old_url ) if old_url else None
if old_domain != new_domain :
# notify about links to banned websites.
already_notified = set ( ) # often admins and mods are the same people - avoid notifying them twice
if new_domain . notify_mods :
for community_member in post . community . moderators ( ) :
notify = Notification ( title = ' Suspicious content ' , url = post . ap_id ,
user_id = community_member . user_id ,
author_id = 1 )
db . session . add ( notify )
already_notified . add ( community_member . user_id )
if new_domain . notify_admins :
for admin in Site . admins ( ) :
if admin . id not in already_notified :
notify = Notification ( title = ' Suspicious content ' ,
url = post . ap_id , user_id = admin . id ,
author_id = 1 )
db . session . add ( notify )
new_domain . post_count + = 1
post . domain = new_domain
# Fix-up cross posts (Posts which link to the same url as other posts)
if post . cross_posts is not None :
old_cross_posts = Post . query . filter ( Post . id . in_ ( post . cross_posts ) ) . all ( )
post . cross_posts . clear ( )
for ocp in old_cross_posts :
if ocp . cross_posts is not None and post . id in ocp . cross_posts :
ocp . cross_posts . remove ( post . id )
new_cross_posts = Post . query . filter ( Post . id != post . id , Post . url == new_url , Post . deleted == False ,
Post . posted_at > utcnow ( ) - timedelta ( days = 6 ) ) . all ( )
for ncp in new_cross_posts :
if ncp . cross_posts is None :
ncp . cross_posts = [ post . id ]
else :
ncp . cross_posts . append ( post . id )
if post . cross_posts is None :
post . cross_posts = [ ncp . id ]
else :
post . cross_posts . append ( ncp . id )
else :
post . type = POST_TYPE_ARTICLE
post . url = ' '
if post . cross_posts is not None : # unlikely, but not impossible
old_cross_posts = Post . query . filter ( Post . id . in_ ( post . cross_posts ) ) . all ( )
post . cross_posts . clear ( )
for ocp in old_cross_posts :
if ocp . cross_posts is not None and post . id in ocp . cross_posts :
ocp . cross_posts . remove ( post . id )
2023-12-30 13:23:12 +13:00
db . session . commit ( )
2024-11-27 15:29:22 +00:00
if old_db_entry_to_delete :
File . query . filter_by ( id = old_db_entry_to_delete ) . delete ( )
db . session . commit ( )
2023-12-30 13:23:12 +13:00
def undo_downvote ( activity_log , comment , post , target_ap_id , user ) :
if ' /comment/ ' in target_ap_id :
comment = PostReply . query . filter_by ( ap_id = target_ap_id ) . first ( )
if ' /post/ ' in target_ap_id :
post = Post . query . filter_by ( ap_id = target_ap_id ) . first ( )
if ( user and not user . is_local ( ) ) and post :
existing_vote = PostVote . query . filter_by ( user_id = user . id , post_id = post . id ) . first ( )
if existing_vote :
post . author . reputation - = existing_vote . effect
post . down_votes - = 1
post . score - = existing_vote . effect
db . session . delete ( existing_vote )
activity_log . result = ' success '
if ( user and not user . is_local ( ) ) and comment :
existing_vote = PostReplyVote . query . filter_by ( user_id = user . id ,
post_reply_id = comment . id ) . first ( )
if existing_vote :
comment . author . reputation - = existing_vote . effect
comment . down_votes - = 1
comment . score - = existing_vote . effect
db . session . delete ( existing_vote )
activity_log . result = ' success '
if user is None :
activity_log . exception_message = ' Blocked or unfound user '
if user and user . is_local ( ) :
activity_log . exception_message = ' Activity about local content which is already present '
activity_log . result = ' ignored '
return post
2024-11-23 01:36:31 +00:00
def undo_vote ( comment , post , target_ap_id , user ) :
2024-01-05 09:39:20 +13:00
voted_on = find_liked_object ( target_ap_id )
2024-11-23 01:36:31 +00:00
if isinstance ( voted_on , Post ) :
2024-01-05 09:39:20 +13:00
post = voted_on
2023-12-30 13:23:12 +13:00
existing_vote = PostVote . query . filter_by ( user_id = user . id , post_id = post . id ) . first ( )
if existing_vote :
post . author . reputation - = existing_vote . effect
if existing_vote . effect < 0 : # Lemmy sends 'like' for upvote and 'dislike' for down votes. Cool! When it undoes an upvote it sends an 'Undo Like'. Fine. When it undoes a downvote it sends an 'Undo Like' - not 'Undo Dislike'?!
post . down_votes - = 1
else :
post . up_votes - = 1
post . score - = existing_vote . effect
db . session . delete ( existing_vote )
2024-11-23 01:36:31 +00:00
db . session . commit ( )
return post
if isinstance ( voted_on , PostReply ) :
2024-01-05 09:39:20 +13:00
comment = voted_on
2023-12-30 13:23:12 +13:00
existing_vote = PostReplyVote . query . filter_by ( user_id = user . id , post_reply_id = comment . id ) . first ( )
if existing_vote :
comment . author . reputation - = existing_vote . effect
if existing_vote . effect < 0 : # Lemmy sends 'like' for upvote and 'dislike' for down votes. Cool! When it undoes an upvote it sends an 'Undo Like'. Fine. When it undoes a downvote it sends an 'Undo Like' - not 'Undo Dislike'?!
comment . down_votes - = 1
else :
comment . up_votes - = 1
comment . score - = existing_vote . effect
db . session . delete ( existing_vote )
2024-11-23 01:36:31 +00:00
db . session . commit ( )
2024-04-17 15:10:04 +01:00
return comment
2024-11-23 01:36:31 +00:00
2024-04-17 15:10:04 +01:00
return None
2023-12-30 13:23:12 +13:00
2024-11-19 12:02:15 +00:00
def process_report ( user , reported , request_json ) :
2024-04-06 16:29:47 +13:00
if len ( request_json [ ' summary ' ] ) < 15 :
reasons = request_json [ ' summary ' ]
description = ' '
else :
reasons = request_json [ ' summary ' ] [ : 15 ]
description = request_json [ ' summary ' ] [ 15 : ]
if isinstance ( reported , User ) :
if reported . reports == - 1 :
return
type = 0
report = Report ( reasons = reasons , description = description ,
type = type , reporter_id = user . id , suspect_user_id = reported . id , source_instance_id = user . instance_id )
db . session . add ( report )
# Notify site admin
already_notified = set ( )
for admin in Site . admins ( ) :
if admin . id not in already_notified :
notify = Notification ( title = ' Reported user ' , url = ' /admin/reports ' , user_id = admin . id ,
author_id = user . id )
db . session . add ( notify )
admin . unread_notifications + = 1
reported . reports + = 1
db . session . commit ( )
elif isinstance ( reported , Post ) :
if reported . reports == - 1 :
return
type = 1
report = Report ( reasons = reasons , description = description , type = type , reporter_id = user . id ,
suspect_user_id = reported . author . id , suspect_post_id = reported . id ,
suspect_community_id = reported . community . id , in_community_id = reported . community . id ,
source_instance_id = user . instance_id )
db . session . add ( report )
already_notified = set ( )
for mod in reported . community . moderators ( ) :
notification = Notification ( user_id = mod . user_id , title = _ ( ' A post has been reported ' ) ,
url = f " https:// { current_app . config [ ' SERVER_NAME ' ] } /post/ { reported . id } " ,
author_id = user . id )
db . session . add ( notification )
already_notified . add ( mod . user_id )
reported . reports + = 1
db . session . commit ( )
elif isinstance ( reported , PostReply ) :
if reported . reports == - 1 :
return
type = 2
post = Post . query . get ( reported . post_id )
report = Report ( reasons = reasons , description = description , type = type , reporter_id = user . id , suspect_post_id = post . id ,
suspect_community_id = post . community . id ,
suspect_user_id = reported . author . id , suspect_post_reply_id = reported . id ,
in_community_id = post . community . id ,
source_instance_id = user . instance_id )
db . session . add ( report )
# Notify moderators
already_notified = set ( )
for mod in post . community . moderators ( ) :
notification = Notification ( user_id = mod . user_id , title = _ ( ' A comment has been reported ' ) ,
url = f " https:// { current_app . config [ ' SERVER_NAME ' ] } /comment/ { reported . id } " ,
author_id = user . id )
db . session . add ( notification )
already_notified . add ( mod . user_id )
reported . reports + = 1
db . session . commit ( )
elif isinstance ( reported , Community ) :
. . .
elif isinstance ( reported , Conversation ) :
. . .
2024-04-04 21:36:03 +13:00
def get_redis_connection ( ) - > redis . Redis :
connection_string = current_app . config [ ' CACHE_REDIS_URL ' ]
if connection_string . startswith ( ' unix:// ' ) :
unix_socket_path , db , password = parse_redis_pipe_string ( connection_string )
return redis . Redis ( unix_socket_path = unix_socket_path , db = db , password = password )
else :
host , port , db , password = parse_redis_socket_string ( connection_string )
return redis . Redis ( host = host , port = port , db = db , password = password )
def parse_redis_pipe_string ( connection_string : str ) :
if connection_string . startswith ( ' unix:// ' ) :
# Parse the connection string
parsed_url = urlparse ( connection_string )
# Extract the path (Unix socket path)
unix_socket_path = parsed_url . path
# Extract query parameters (if any)
query_params = parse_qs ( parsed_url . query )
# Extract database number (default to 0 if not provided)
db = int ( query_params . get ( ' db ' , [ 0 ] ) [ 0 ] )
# Extract password (if provided)
password = query_params . get ( ' password ' , [ None ] ) [ 0 ]
return unix_socket_path , db , password
def parse_redis_socket_string ( connection_string : str ) :
# Parse the connection string
parsed_url = urlparse ( connection_string )
# Extract username (if provided) and password
if parsed_url . username :
username = parsed_url . username
else :
username = None
password = parsed_url . password
# Extract host and port
host = parsed_url . hostname
port = parsed_url . port
# Extract database number (default to 0 if not provided)
db = int ( parsed_url . path . lstrip ( ' / ' ) or 0 )
return host , port , db , password
2023-11-23 15:10:44 +13:00
def lemmy_site_data ( ) :
2023-12-23 11:32:22 +13:00
site = g . site
2024-06-14 18:03:47 +08:00
logo = site . logo if site . logo else ' /static/images/logo2.png '
2023-11-23 15:10:44 +13:00
data = {
" site_view " : {
" site " : {
" id " : 1 ,
2023-12-17 00:12:49 +13:00
" name " : site . name ,
" sidebar " : site . sidebar ,
" published " : site . created_at . isoformat ( ) ,
" updated " : site . updated . isoformat ( ) ,
2024-06-14 18:03:47 +08:00
" icon " : f " https:// { current_app . config [ ' SERVER_NAME ' ] } { logo } " ,
2024-02-14 14:53:03 +13:00
" banner " : " " ,
2023-12-17 00:12:49 +13:00
" description " : site . description ,
" actor_id " : f " https:// { current_app . config [ ' SERVER_NAME ' ] } / " ,
" last_refreshed_at " : site . updated . isoformat ( ) ,
" inbox_url " : f " https:// { current_app . config [ ' SERVER_NAME ' ] } /inbox " ,
" public_key " : site . public_key ,
2023-11-23 15:10:44 +13:00
" instance_id " : 1
} ,
" local_site " : {
" id " : 1 ,
" site_id " : 1 ,
" site_setup " : True ,
2023-12-17 00:12:49 +13:00
" enable_downvotes " : site . enable_downvotes ,
" enable_nsfw " : site . enable_nsfw ,
" enable_nsfl " : site . enable_nsfl ,
" community_creation_admin_only " : site . community_creation_admin_only ,
2023-11-23 15:10:44 +13:00
" require_email_verification " : True ,
2023-12-17 00:12:49 +13:00
" application_question " : site . application_question ,
2023-11-23 15:10:44 +13:00
" private_instance " : False ,
" default_theme " : " browser " ,
" default_post_listing_type " : " All " ,
" hide_modlog_mod_names " : True ,
" application_email_admins " : True ,
" actor_name_max_length " : 20 ,
" federation_enabled " : True ,
" captcha_enabled " : True ,
" captcha_difficulty " : " medium " ,
2023-12-17 00:12:49 +13:00
" published " : site . created_at . isoformat ( ) ,
" updated " : site . updated . isoformat ( ) ,
" registration_mode " : site . registration_mode ,
" reports_email_admins " : site . reports_email_admins
2023-11-23 15:10:44 +13:00
} ,
" local_site_rate_limit " : {
" id " : 1 ,
" local_site_id " : 1 ,
" message " : 999 ,
" message_per_second " : 60 ,
" post " : 50 ,
" post_per_second " : 600 ,
" register " : 20 ,
" register_per_second " : 3600 ,
" image " : 100 ,
" image_per_second " : 3600 ,
" comment " : 100 ,
" comment_per_second " : 600 ,
" search " : 999 ,
" search_per_second " : 600 ,
2023-12-17 00:12:49 +13:00
" published " : site . created_at . isoformat ( ) ,
2023-11-23 15:10:44 +13:00
} ,
" counts " : {
" id " : 1 ,
" site_id " : 1 ,
2024-02-14 14:38:55 +13:00
" users " : users_total ( ) ,
" posts " : local_posts ( ) ,
" comments " : local_comments ( ) ,
" communities " : local_communities ( ) ,
" users_active_day " : active_day ( ) ,
" users_active_week " : active_week ( ) ,
" users_active_month " : active_month ( ) ,
" users_active_half_year " : active_half_year ( )
2023-11-23 15:10:44 +13:00
}
} ,
2024-02-14 14:38:55 +13:00
" admins " : [ ] ,
2023-11-23 15:10:44 +13:00
" version " : " 1.0.0 " ,
2024-04-22 20:10:13 +12:00
" all_languages " : [ ] ,
" discussion_languages " : [ ] ,
2024-02-14 14:38:55 +13:00
" taglines " : [ ] ,
2023-11-23 15:10:44 +13:00
" custom_emojis " : [ ]
}
2024-04-22 20:10:13 +12:00
# Languages
discussion_languages = [ ]
for language in Language . query . all ( ) :
# hardcode English as the site language, for now. This will need to be an admin setting, soon.
if language . code == ' und ' or language . code == ' en ' :
discussion_languages . append ( language . id )
data [ ' all_languages ' ] . append ( {
' id ' : language . id ,
' code ' : language . code ,
' name ' : language . name
} )
data [ ' discussion_languages ' ] = discussion_languages
# Admins
2024-02-14 14:38:55 +13:00
for admin in Site . admins ( ) :
person = {
" id " : admin . id ,
2024-04-18 14:17:27 +12:00
" name " : admin . user_name ,
" display_name " : admin . display_name ( ) ,
2024-02-14 14:53:03 +13:00
" avatar " : ' https:// ' + current_app . config [ ' SERVER_NAME ' ] + admin . avatar_image ( ) ,
2024-02-14 14:38:55 +13:00
" banned " : admin . banned ,
2024-04-18 14:23:01 +12:00
" published " : admin . created . isoformat ( ) + ' Z ' ,
" updated " : admin . created . isoformat ( ) + ' Z ' ,
2024-06-05 13:21:41 +12:00
" actor_id " : admin . public_url ( ) ,
2024-02-14 14:38:55 +13:00
" local " : True ,
" deleted " : admin . deleted ,
" matrix_user_id " : admin . matrix_user_id ,
" admin " : True ,
" bot_account " : admin . bot ,
" instance_id " : 1
}
counts = {
" id " : admin . id ,
" person_id " : admin . id ,
" post_count " : 0 ,
" post_score " : 0 ,
" comment_count " : 0 ,
" comment_score " : 0
}
data [ ' admins ' ] . append ( { ' person ' : person , ' counts ' : counts } )
2023-11-23 15:10:44 +13:00
return data
2024-04-24 13:44:25 +01:00
def ensure_domains_match ( activity : dict ) - > bool :
if ' id ' in activity :
note_id = activity [ ' id ' ]
else :
note_id = None
2024-05-25 16:38:40 +01:00
note_actor = None
2024-04-24 13:44:25 +01:00
if ' actor ' in activity :
note_actor = activity [ ' actor ' ]
2024-05-25 16:38:40 +01:00
elif ' attributedTo ' in activity and isinstance ( activity [ ' attributedTo ' ] , str ) :
2024-04-24 13:44:25 +01:00
note_actor = activity [ ' attributedTo ' ]
2024-05-25 16:38:40 +01:00
elif ' attributedTo ' in activity and isinstance ( activity [ ' attributedTo ' ] , list ) :
for a in activity [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Person ' :
note_actor = a [ ' id ' ]
break
2024-04-24 13:44:25 +01:00
if note_id and note_actor :
parsed_url = urlparse ( note_id )
id_domain = parsed_url . netloc
parsed_url = urlparse ( note_actor )
actor_domain = parsed_url . netloc
if id_domain == actor_domain :
return True
return False
2024-05-09 17:54:30 +12:00
def can_edit ( user_ap_id , post ) :
user = find_actor_or_create ( user_ap_id , create_if_not_found = False )
if user :
if post . user_id == user . id :
return True
if post . community . is_moderator ( user ) or post . community . is_owner ( user ) or post . community . is_instance_admin ( user ) :
return True
return False
def can_delete ( user_ap_id , post ) :
return can_edit ( user_ap_id , post )
2024-05-26 15:53:17 +01:00
2024-11-23 19:26:25 +00:00
def resolve_remote_post ( uri : str , community_id : int , announce_actor = None , store_ap_json = False ) - > Union [ Post , PostReply , None ] :
2024-05-26 15:53:17 +01:00
post = Post . query . filter_by ( ap_id = uri ) . first ( )
if post :
2024-05-27 23:51:14 +01:00
return post
2024-05-26 15:53:17 +01:00
community = Community . query . get ( community_id )
site = Site . query . get ( 1 )
parsed_url = urlparse ( uri )
uri_domain = parsed_url . netloc
if announce_actor :
parsed_url = urlparse ( announce_actor )
announce_actor_domain = parsed_url . netloc
2024-07-15 20:46:48 +08:00
if announce_actor_domain != ' a.gup.pe ' and announce_actor_domain != uri_domain :
2024-05-26 15:53:17 +01:00
return None
actor_domain = None
actor = None
post_request = get_request ( uri , headers = { ' Accept ' : ' application/activity+json ' } )
if post_request . status_code == 200 :
post_data = post_request . json ( )
post_request . close ( )
2024-05-28 15:40:09 +12:00
# check again that it doesn't already exist (can happen with different but equivalent URLs)
2024-05-26 15:53:17 +01:00
post = Post . query . filter_by ( ap_id = post_data [ ' id ' ] ) . first ( )
if post :
2024-05-27 23:51:14 +01:00
return post
2024-05-26 15:53:17 +01:00
if ' attributedTo ' in post_data :
if isinstance ( post_data [ ' attributedTo ' ] , str ) :
actor = post_data [ ' attributedTo ' ]
parsed_url = urlparse ( post_data [ ' attributedTo ' ] )
actor_domain = parsed_url . netloc
elif isinstance ( post_data [ ' attributedTo ' ] , list ) :
for a in post_data [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Person ' :
actor = a [ ' id ' ]
parsed_url = urlparse ( a [ ' id ' ] )
actor_domain = parsed_url . netloc
break
if uri_domain != actor_domain :
return None
2024-05-27 23:51:14 +01:00
if not announce_actor :
# make sure that the post actually belongs in the community a user says it does
remote_community = None
if post_data [ ' type ' ] == ' Page ' : # lemmy
remote_community = post_data [ ' audience ' ] if ' audience ' in post_data else None
if remote_community and remote_community . lower ( ) != community . ap_profile_id :
return None
elif post_data [ ' type ' ] == ' Video ' : # peertube
if ' attributedTo ' in post_data and isinstance ( post_data [ ' attributedTo ' ] , list ) :
for a in post_data [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Group ' :
remote_community = a [ ' id ' ]
break
if remote_community and remote_community . lower ( ) != community . ap_profile_id :
return None
else : # mastodon, etc
if ' inReplyTo ' not in post_data or post_data [ ' inReplyTo ' ] != None :
return None
community_found = False
if not community_found and ' to ' in post_data and isinstance ( post_data [ ' to ' ] , str ) :
remote_community = post_data [ ' to ' ]
if remote_community . lower ( ) == community . ap_profile_id :
community_found = True
if not community_found and ' cc ' in post_data and isinstance ( post_data [ ' cc ' ] , str ) :
remote_community = post_data [ ' cc ' ]
if remote_community . lower ( ) == community . ap_profile_id :
community_found = True
if not community_found and ' to ' in post_data and isinstance ( post_data [ ' to ' ] , list ) :
for t in post_data [ ' to ' ] :
if t . lower ( ) == community . ap_profile_id :
community_found = True
break
if not community_found and ' cc ' in post_data and isinstance ( post_data [ ' cc ' ] , list ) :
for c in post_data [ ' cc ' ] :
if c . lower ( ) == community . ap_profile_id :
community_found = True
break
if not community_found :
return None
2024-05-26 15:53:17 +01:00
user = find_actor_or_create ( actor )
if user and community and post_data :
2024-05-27 22:51:49 +01:00
request_json = {
2024-11-23 19:26:25 +00:00
' id ' : f " https:// { uri_domain } /activities/create/ { gibberish ( 15 ) } " ,
2024-05-27 22:51:49 +01:00
' object ' : post_data
}
2024-07-15 20:46:48 +08:00
if ' inReplyTo ' in request_json [ ' object ' ] and request_json [ ' object ' ] [ ' inReplyTo ' ] :
2024-11-23 19:26:25 +00:00
post_reply = create_post_reply ( store_ap_json , community , request_json [ ' object ' ] [ ' inReplyTo ' ] , request_json , user )
2024-07-15 20:46:48 +08:00
if post_reply :
if ' published ' in post_data :
post_reply . posted_at = post_data [ ' published ' ]
post_reply . post . last_active = post_data [ ' published ' ]
2024-07-16 09:36:31 +08:00
post_reply . community . last_active = utcnow ( )
2024-07-15 20:46:48 +08:00
db . session . commit ( )
return post_reply
else :
2024-11-23 19:26:25 +00:00
post = create_post ( store_ap_json , community , request_json , user )
2024-07-15 20:46:48 +08:00
if post :
if ' published ' in post_data :
post . posted_at = post_data [ ' published ' ]
post . last_active = post_data [ ' published ' ]
2024-07-16 09:36:31 +08:00
post . community . last_active = utcnow ( )
2024-07-15 20:46:48 +08:00
db . session . commit ( )
return post
2024-05-26 15:53:17 +01:00
return None
2024-05-28 22:28:03 +01:00
def resolve_remote_post_from_search ( uri : str ) - > Union [ Post , None ] :
post = Post . query . filter_by ( ap_id = uri ) . first ( )
if post :
return post
site = Site . query . get ( 1 )
parsed_url = urlparse ( uri )
uri_domain = parsed_url . netloc
actor_domain = None
actor = None
post_request = get_request ( uri , headers = { ' Accept ' : ' application/activity+json ' } )
if post_request . status_code == 200 :
post_data = post_request . json ( )
post_request . close ( )
# check again that it doesn't already exist (can happen with different but equivalent URLs)
post = Post . query . filter_by ( ap_id = post_data [ ' id ' ] ) . first ( )
if post :
return post
# find the author of the post. Make sure their domain matches the site hosting it to migitage impersonation attempts
if ' attributedTo ' in post_data :
if isinstance ( post_data [ ' attributedTo ' ] , str ) :
actor = post_data [ ' attributedTo ' ]
parsed_url = urlparse ( post_data [ ' attributedTo ' ] )
actor_domain = parsed_url . netloc
elif isinstance ( post_data [ ' attributedTo ' ] , list ) :
for a in post_data [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Person ' :
actor = a [ ' id ' ]
parsed_url = urlparse ( a [ ' id ' ] )
actor_domain = parsed_url . netloc
break
if uri_domain != actor_domain :
return None
# find the community the post was submitted to
community = None
if not community and post_data [ ' type ' ] == ' Page ' : # lemmy
if ' audience ' in post_data :
community_id = post_data [ ' audience ' ]
2024-09-09 08:43:10 +12:00
community = find_actor_or_create ( community_id , community_only = True )
2024-05-28 22:28:03 +01:00
if not community and post_data [ ' type ' ] == ' Video ' : # peertube
if ' attributedTo ' in post_data and isinstance ( post_data [ ' attributedTo ' ] , list ) :
for a in post_data [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Group ' :
community_id = a [ ' id ' ]
2024-09-09 08:43:10 +12:00
community = find_actor_or_create ( community_id , community_only = True )
2024-05-28 22:28:03 +01:00
if community :
break
if not community : # mastodon, etc
if ' inReplyTo ' not in post_data or post_data [ ' inReplyTo ' ] != None :
return None
if not community and ' to ' in post_data and isinstance ( post_data [ ' to ' ] , str ) :
community_id = post_data [ ' to ' ] . lower ( )
if not community_id == ' https://www.w3.org/ns/activitystreams#Public ' and not community_id . endswith ( ' /followers ' ) :
community = Community . query . filter_by ( ap_profile_id = community_id ) . first ( )
if not community and ' cc ' in post_data and isinstance ( post_data [ ' cc ' ] , str ) :
community_id = post_data [ ' cc ' ] . lower ( )
if not community_id == ' https://www.w3.org/ns/activitystreams#Public ' and not community_id . endswith ( ' /followers ' ) :
community = Community . query . filter_by ( ap_profile_id = community_id ) . first ( )
if not community and ' to ' in post_data and isinstance ( post_data [ ' to ' ] , list ) :
for t in post_data [ ' to ' ] :
community_id = t . lower ( )
if not community_id == ' https://www.w3.org/ns/activitystreams#Public ' and not community_id . endswith ( ' /followers ' ) :
community = Community . query . filter_by ( ap_profile_id = community_id ) . first ( )
if community :
break
if not community and ' cc ' in post_data and isinstance ( post_data [ ' to ' ] , list ) :
for c in post_data [ ' cc ' ] :
community_id = c . lower ( )
if not community_id == ' https://www.w3.org/ns/activitystreams#Public ' and not community_id . endswith ( ' /followers ' ) :
community = Community . query . filter_by ( ap_profile_id = community_id ) . first ( )
if community :
break
if not community :
return None
activity_log = ActivityPubLog ( direction = ' in ' , activity_id = post_data [ ' id ' ] , activity_type = ' Resolve Post ' , result = ' failure ' )
if site . log_activitypub_json :
activity_log . activity_json = json . dumps ( post_data )
db . session . add ( activity_log )
user = find_actor_or_create ( actor )
if user and community and post_data :
request_json = {
' id ' : f " https:// { uri_domain } /activities/create/gibberish(15) " ,
' object ' : post_data
}
post = create_post ( activity_log , community , request_json , user )
if post :
if ' published ' in post_data :
post . posted_at = post_data [ ' published ' ]
post . last_active = post_data [ ' published ' ]
db . session . commit ( )
return post
return None
2024-05-31 22:05:44 +01:00
# This is for followers on microblog apps
# Used to let them know a Poll has been updated with a new vote
# The plan is to also use it for activities on local user's posts that aren't understood by being Announced (anything beyond the initial Create)
# This would need for posts to have things like a 'Replies' collection and a 'Likes' collection, so these can be downloaded when the post updates
# Using collecions like this (as PeerTube does) circumvents the problem of not having a remote user's private key.
# The problem of what to do for remote user's activity on a remote user's post in a local community still exists (can't Announce it, can't inform of post update)
2024-06-01 19:52:17 +12:00
def inform_followers_of_post_update ( post_id : int , sending_instance_id : int ) :
2024-05-31 22:05:44 +01:00
if current_app . debug :
2024-06-01 19:52:17 +12:00
inform_followers_of_post_update_task ( post_id , sending_instance_id )
2024-05-31 22:05:44 +01:00
else :
2024-06-01 19:52:17 +12:00
inform_followers_of_post_update_task . delay ( post_id , sending_instance_id )
2024-05-31 22:05:44 +01:00
@celery.task
2024-06-01 19:52:17 +12:00
def inform_followers_of_post_update_task ( post_id : int , sending_instance_id : int ) :
post = Post . query . get ( post_id )
2024-05-31 22:05:44 +01:00
page_json = post_to_page ( post )
page_json [ ' updated ' ] = ap_datetime ( utcnow ( ) )
update_json = {
' id ' : f " https:// { current_app . config [ ' SERVER_NAME ' ] } /activities/update/ { gibberish ( 15 ) } " ,
' type ' : ' Update ' ,
2024-06-05 13:21:41 +12:00
' actor ' : post . author . public_url ( ) ,
' audience ' : post . community . public_url ( ) ,
2024-05-31 22:05:44 +01:00
' to ' : [ ' https://www.w3.org/ns/activitystreams#Public ' ] ,
' published ' : ap_datetime ( utcnow ( ) ) ,
' cc ' : [
post . author . followers_url ( ) , post . community . ap_followers_url
] ,
' object ' : page_json ,
}
# inform user followers first
followers = UserFollower . query . filter_by ( local_user_id = post . user_id )
if followers :
instances = Instance . query . join ( User , User . instance_id == Instance . id ) . join ( UserFollower , UserFollower . remote_user_id == User . id )
instances = instances . filter ( UserFollower . local_user_id == post . user_id , Instance . software . in_ ( MICROBLOG_APPS ) )
for i in instances :
if sending_instance_id != i . id :
try :
2024-06-05 13:21:41 +12:00
post_request ( i . inbox , update_json , post . author . private_key , post . author . public_url ( ) + ' #main-key ' )
2024-05-31 22:05:44 +01:00
except Exception :
pass
# then community followers
instances = Instance . query . join ( User , User . instance_id == Instance . id ) . join ( CommunityMember , CommunityMember . user_id == User . id )
instances = instances . filter ( CommunityMember . community_id == post . community . id , CommunityMember . is_banned == False )
instances = instances . filter ( Instance . software . in_ ( MICROBLOG_APPS ) )
for i in instances :
if sending_instance_id != i . id :
try :
2024-06-05 13:21:41 +12:00
post_request ( i . inbox , update_json , post . author . private_key , post . author . public_url ( ) + ' #main-key ' )
2024-05-31 22:05:44 +01:00
except Exception :
pass
2024-11-18 15:56:47 +00:00
def log_incoming_ap ( id , aplog_type , aplog_result , request_json , message = None ) :
aplog_in = APLOG_IN
if aplog_in and aplog_type [ 0 ] and aplog_result [ 0 ] :
activity_log = ActivityPubLog ( direction = ' in ' , activity_id = id , activity_type = aplog_type [ 1 ] , result = aplog_result [ 1 ] )
if message :
activity_log . exception_message = message
if request_json :
activity_log . activity_json = json . dumps ( request_json )
db . session . add ( activity_log )
db . session . commit ( )
2024-11-18 22:05:25 +00:00
def find_community_ap_id ( request_json ) :
locations = [ ' audience ' , ' cc ' , ' to ' ]
if ' object ' in request_json and isinstance ( request_json [ ' object ' ] , dict ) :
rjs = [ request_json , request_json [ ' object ' ] ]
else :
rjs = [ request_json ]
for rj in rjs :
for location in locations :
if location in rj :
potential_id = rj [ location ]
if isinstance ( potential_id , str ) :
if not potential_id . startswith ( ' https://www.w3.org ' ) and not potential_id . endswith ( ' /followers ' ) :
potential_community = Community . query . filter_by ( ap_profile_id = potential_id . lower ( ) ) . first ( )
if potential_community :
return potential_id
if isinstance ( potential_id , list ) :
for c in potential_id :
if not c . startswith ( ' https://www.w3.org ' ) and not c . endswith ( ' /followers ' ) :
potential_community = Community . query . filter_by ( ap_profile_id = c . lower ( ) ) . first ( )
if potential_community :
return c
if not ' object ' in request_json :
return None
if ' inReplyTo ' in request_json [ ' object ' ] and request_json [ ' object ' ] [ ' inReplyTo ' ] is not None :
post_being_replied_to = Post . query . filter_by ( ap_id = request_json [ ' object ' ] [ ' inReplyTo ' ] ) . first ( )
if post_being_replied_to :
return post_being_replied_to . community . ap_profile_id
else :
comment_being_replied_to = PostReply . query . filter_by ( ap_id = request_json [ ' object ' ] [ ' inReplyTo ' ] ) . first ( )
if comment_being_replied_to :
return comment_being_replied_to . community . ap_profile_id
if request_json [ ' object ' ] [ ' type ' ] == ' Video ' : # PeerTube
if ' attributedTo ' in request_json [ ' object ' ] and isinstance ( request_json [ ' object ' ] [ ' attributedTo ' ] , list ) :
for a in request_json [ ' object ' ] [ ' attributedTo ' ] :
if a [ ' type ' ] == ' Group ' :
return a [ ' id ' ]
return None