mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
Merge pull request 'Adding admin remote instance scan function' (#368) from JollyDevelopment/pyfedi:jollydev/add-admin-remote-instance-scan-01 into main
Reviewed-on: https://codeberg.org/rimu/pyfedi/pulls/368
This commit is contained in:
commit
b98958e6ac
4 changed files with 277 additions and 8 deletions
|
@ -56,6 +56,13 @@ class PreLoadCommunitiesForm(FlaskForm):
|
|||
communities_num = IntegerField(_l('Number of Communities to add'), default=25)
|
||||
pre_load_submit = SubmitField(_l('Add Communities'))
|
||||
|
||||
class RemoteInstanceScanForm(FlaskForm):
|
||||
remote_url = StringField(_l('Remote Server'), validators=[DataRequired()])
|
||||
communities_requested = IntegerField(_l('Number of Communities to add'), default=25)
|
||||
minimum_posts = IntegerField(_l('Communities must have at least this many posts'), default=100)
|
||||
minimum_active_users = IntegerField(_l('Communities must have at least this many active users in the past week.'), default=100)
|
||||
dry_run = BooleanField(_l('Dry Run'))
|
||||
remote_scan_submit = SubmitField(_l('Scan'))
|
||||
|
||||
class ImportExportBannedListsForm(FlaskForm):
|
||||
import_file = FileField(_l('Import Bans List Json File'))
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import re
|
||||
from datetime import timedelta
|
||||
from time import sleep
|
||||
from io import BytesIO
|
||||
|
@ -10,6 +11,7 @@ from flask_babel import _
|
|||
from slugify import slugify
|
||||
from sqlalchemy import text, desc, or_
|
||||
from PIL import Image
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from app import db, celery, cache
|
||||
from app.activitypub.routes import process_inbox_request, process_delete_request, replay_inbox_request
|
||||
|
@ -17,7 +19,7 @@ from app.activitypub.signature import post_request, default_context
|
|||
from app.activitypub.util import instance_allowed, instance_blocked, extract_domain_and_actor
|
||||
from app.admin.forms import FederationForm, SiteMiscForm, SiteProfileForm, EditCommunityForm, EditUserForm, \
|
||||
EditTopicForm, SendNewsletterForm, AddUserForm, PreLoadCommunitiesForm, ImportExportBannedListsForm, \
|
||||
EditInstanceForm
|
||||
EditInstanceForm, RemoteInstanceScanForm
|
||||
from app.admin.util import unsubscribe_from_everything_then_delete, unsubscribe_from_community, send_newsletter, \
|
||||
topics_for_form
|
||||
from app.community.util import save_icon_file, save_banner_file, search_for_community
|
||||
|
@ -196,6 +198,7 @@ def admin_federation():
|
|||
form = FederationForm()
|
||||
preload_form = PreLoadCommunitiesForm()
|
||||
ban_lists_form = ImportExportBannedListsForm()
|
||||
remote_scan_form = RemoteInstanceScanForm()
|
||||
|
||||
# this is the pre-load communities button
|
||||
if preload_form.pre_load_submit.data and preload_form.validate():
|
||||
|
@ -315,6 +318,250 @@ def admin_federation():
|
|||
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
# this is the remote server scan
|
||||
elif remote_scan_form.remote_scan_submit.data and remote_scan_form.validate():
|
||||
# filters to be used later
|
||||
already_known = list(db.session.execute(text('SELECT ap_public_url FROM "community"')).scalars())
|
||||
banned_urls = list(db.session.execute(text('SELECT domain FROM "banned_instances"')).scalars())
|
||||
seven_things_plus = [
|
||||
'shit', 'piss', 'fuck',
|
||||
'cunt', 'cocksucker', 'motherfucker', 'tits',
|
||||
'memes', 'piracy', '196', 'greentext', 'usauthoritarianism',
|
||||
'enoughmuskspam', 'political_weirdos', '4chan'
|
||||
]
|
||||
is_lemmy = False
|
||||
is_mbin = False
|
||||
|
||||
|
||||
# get the remote_url data
|
||||
remote_url = remote_scan_form.remote_url.data
|
||||
|
||||
# test to make sure its a valid fqdn
|
||||
regex_pattern = '^(https:\/\/)(?=.{1,255}$)((.{1,63}\.){1,127}(?![0-9]*$)[a-z0-9-]+\.?)$'
|
||||
result = re.match(regex_pattern, remote_url)
|
||||
if result is None:
|
||||
flash(_(f'{remote_url} does not appear to be a valid url. Make sure input is in the form "https://server-name.tld" without trailing slashes or paths.'))
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
# check if it's a banned instance
|
||||
# Parse the URL
|
||||
parsed_url = urlparse(remote_url)
|
||||
# Extract the server domain name
|
||||
server_domain = parsed_url.netloc
|
||||
if server_domain in banned_urls:
|
||||
flash(_(f'{remote_url} is a banned instance.'))
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
# get dry run
|
||||
dry_run = remote_scan_form.dry_run.data
|
||||
|
||||
# get the number of follows requested
|
||||
communities_requested = remote_scan_form.communities_requested.data
|
||||
|
||||
# get the minimums
|
||||
min_posts = remote_scan_form.minimum_posts.data
|
||||
min_users = remote_scan_form.minimum_active_users.data
|
||||
|
||||
# get the nodeinfo
|
||||
resp = get_request(f'{remote_url}/.well-known/nodeinfo')
|
||||
nodeinfo_dict = json.loads(resp.text)
|
||||
|
||||
# check the ['links'] for instanceinfo url
|
||||
schema2p0 = "http://nodeinfo.diaspora.software/ns/schema/2.0"
|
||||
schema2p1 = "http://nodeinfo.diaspora.software/ns/schema/2.1"
|
||||
for e in nodeinfo_dict['links']:
|
||||
if e['rel'] == schema2p0 or e['rel'] == schema2p1:
|
||||
remote_instanceinfo_url = e["href"]
|
||||
|
||||
# get the instanceinfo
|
||||
resp = get_request(remote_instanceinfo_url)
|
||||
instanceinfo_dict = json.loads(resp.text)
|
||||
|
||||
# determine the instance software
|
||||
instance_software_name = instanceinfo_dict['software']['name']
|
||||
# instance_software_version = instanceinfo_dict['software']['version']
|
||||
|
||||
# if the instance is not running lemmy or mbin break for now as
|
||||
# we dont yet support others for scanning
|
||||
if instance_software_name == "lemmy":
|
||||
is_lemmy = True
|
||||
elif instance_software_name == "mbin":
|
||||
is_mbin = True
|
||||
else:
|
||||
flash(_(f"{remote_url} does not appear to be a lemmy or mbin instance."))
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
if is_lemmy:
|
||||
# lemmy has a hard-coded upper limit of 50 commnities
|
||||
# in their api response
|
||||
# loop through and send off requests to the remote endpoint for 50 communities at a time
|
||||
comms_list = []
|
||||
page = 1
|
||||
get_more_communities = True
|
||||
while get_more_communities:
|
||||
params = {"sort":"Active","type_":"Local","limit":"50","page":f"{page}","show_nsfw":"false"}
|
||||
resp = get_request(f"{remote_url}/api/v3/community/list", params=params)
|
||||
page_dict = json.loads(resp.text)
|
||||
# get the individual communities out of the communities[] list in the response and
|
||||
# add them to a holding list[] of our own
|
||||
for c in page_dict["communities"]:
|
||||
comms_list.append(c)
|
||||
# check the amount of items in the page_dict['communities'] list
|
||||
# if it's lesss than 50 then we know its the last page of communities
|
||||
# so we break the loop
|
||||
if len(page_dict['communities']) < 50:
|
||||
get_more_communities = False
|
||||
else:
|
||||
page += 1
|
||||
|
||||
# filter out the communities
|
||||
already_known_count = nsfw_count = low_content_count = low_active_users_count = bad_words_count = 0
|
||||
candidate_communities = []
|
||||
for community in comms_list:
|
||||
# sort out already known communities
|
||||
if community['community']['actor_id'] in already_known:
|
||||
already_known_count += 1
|
||||
continue
|
||||
# sort out any that have less than minimum posts
|
||||
elif community['counts']['posts'] < min_posts:
|
||||
low_content_count += 1
|
||||
continue
|
||||
# sort out any that do not have greater than the requested active users over the past week
|
||||
elif community['counts']['users_active_week'] < min_users:
|
||||
low_active_users_count += 1
|
||||
continue
|
||||
# sort out the 'seven things you can't say on tv' names (cursewords), plus some
|
||||
# "low effort" communities
|
||||
if any(badword in community['community']['name'].lower() for badword in seven_things_plus):
|
||||
bad_words_count += 1
|
||||
continue
|
||||
else:
|
||||
candidate_communities.append(community)
|
||||
|
||||
# get the community urls to join
|
||||
community_urls_to_join = []
|
||||
|
||||
# if the admin user wants more added than we have, then just add all of them
|
||||
if communities_requested > len(candidate_communities):
|
||||
communities_to_add = len(candidate_communities)
|
||||
else:
|
||||
communities_to_add = communities_requested
|
||||
|
||||
# make the list of urls
|
||||
for i in range(communities_to_add):
|
||||
community_urls_to_join.append(candidate_communities[i]['community']['actor_id'].lower())
|
||||
|
||||
# if its a dry run, just return the stats
|
||||
if dry_run:
|
||||
message = f"Dry-Run for {remote_url}: \
|
||||
Local Communities on the server: {len(comms_list)}, \
|
||||
Communities we already have: {already_known_count}, \
|
||||
Communities below minimum posts: {low_content_count}, \
|
||||
Communities below minimum users: {low_active_users_count}, \
|
||||
Candidate Communities based on filters: {len(candidate_communities)}, \
|
||||
Communities to join request: {communities_requested}, \
|
||||
Communities to join based on current filters: {len(community_urls_to_join)}."
|
||||
flash(_(message))
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
if is_mbin:
|
||||
# loop through and send the right number of requests to the remote endpoint for mbin
|
||||
# mbin does not have the hard-coded limit, but lets stick with 50 to match lemmy
|
||||
mags_list = []
|
||||
page = 1
|
||||
get_more_magazines = True
|
||||
while get_more_magazines:
|
||||
params = {"p":f"{page}","perPage":"50","sort":"active","federation":"local","hide_adult":"hide"}
|
||||
resp = get_request(f"{remote_url}/api/magazines", params=params)
|
||||
page_dict = json.loads(resp.text)
|
||||
# get the individual magazines out of the items[] list in the response and
|
||||
# add them to a holding list[] of our own
|
||||
for m in page_dict['items']:
|
||||
mags_list.append(m)
|
||||
# check the amount of items in the page_dict['items'] list
|
||||
# if it's lesss than 50 then we know its the last page of magazines
|
||||
# so we break the loop
|
||||
if len(page_dict['items']) < 50:
|
||||
get_more_magazines = False
|
||||
else:
|
||||
page += 1
|
||||
|
||||
# filter out the magazines
|
||||
already_known_count = low_content_count = low_subscribed_users_count = bad_words_count = 0
|
||||
candidate_communities = []
|
||||
for magazine in mags_list:
|
||||
# sort out already known communities
|
||||
if magazine['apProfileId'] in already_known:
|
||||
already_known_count += 1
|
||||
continue
|
||||
# sort out any that have less than minimum posts
|
||||
elif magazine['entryCount'] < min_posts:
|
||||
low_content_count += 1
|
||||
continue
|
||||
# sort out any that do not have greater than the requested users over the past week
|
||||
# mbin does not show active users here, so its based on subscriber count
|
||||
elif magazine['subscriptionsCount'] < min_users:
|
||||
low_subscribed_users_count += 1
|
||||
continue
|
||||
# sort out the 'seven things you can't say on tv' names (cursewords), plus some
|
||||
# "low effort" communities
|
||||
if any(badword in magazine['name'].lower() for badword in seven_things_plus):
|
||||
bad_words_count += 1
|
||||
continue
|
||||
else:
|
||||
candidate_communities.append(magazine)
|
||||
|
||||
# get the community urls to join
|
||||
community_urls_to_join = []
|
||||
|
||||
# if the admin user wants more added than we have, then just add all of them
|
||||
if communities_requested > len(candidate_communities):
|
||||
magazines_to_add = len(candidate_communities)
|
||||
else:
|
||||
magazines_to_add = communities_requested
|
||||
|
||||
# make the list of urls
|
||||
for i in range(magazines_to_add):
|
||||
community_urls_to_join.append(candidate_communities[i]['apProfileId'].lower())
|
||||
|
||||
# if its a dry run, just return the stats
|
||||
if dry_run:
|
||||
message = f"Dry-Run for {remote_url}: \
|
||||
Local Magazines on the server: {len(mags_list)}, \
|
||||
Magazines we already have: {already_known_count}, \
|
||||
Magazines below minimum posts: {low_content_count}, \
|
||||
Magazines below minimum users: {low_subscribed_users_count}, \
|
||||
Candidate Magazines based on filters: {len(candidate_communities)}, \
|
||||
Magazines to join request: {communities_requested}, \
|
||||
Magazines to join based on current filters: {len(community_urls_to_join)}."
|
||||
flash(_(message))
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
user = User.query.get(1)
|
||||
remote_scan_messages = []
|
||||
for community in community_urls_to_join:
|
||||
# get the relevant url bits
|
||||
server, community = extract_domain_and_actor(community)
|
||||
# find the community
|
||||
new_community = search_for_community('!' + community + '@' + server)
|
||||
# subscribe to the community
|
||||
# capture the messages returned by do_subscribe
|
||||
# and show to user if instance is in debug mode
|
||||
if current_app.debug:
|
||||
message = do_subscribe(new_community.ap_id, user.id, admin_preload=True)
|
||||
remote_scan_messages.append(message)
|
||||
else:
|
||||
message_we_wont_do_anything_with = do_subscribe.delay(new_community.ap_id, user.id, admin_preload=True)
|
||||
|
||||
if current_app.debug:
|
||||
flash(_('Results: %(results)s', results=str(remote_scan_messages)))
|
||||
else:
|
||||
flash(
|
||||
_('Based on current filters, the subscription process for %(communities_to_join)d of %(candidate_communities)d communities launched in background, check admin/activities for details',
|
||||
communities_to_join=len(community_urls_to_join), candidate_communities=len(candidate_communities)))
|
||||
|
||||
return redirect(url_for('admin.admin_federation'))
|
||||
|
||||
# this is the import bans button
|
||||
elif ban_lists_form.import_submit.data and ban_lists_form.validate():
|
||||
import_file = request.files['import_file']
|
||||
|
@ -440,7 +687,7 @@ def admin_federation():
|
|||
|
||||
return render_template('admin/federation.html', title=_('Federation settings'),
|
||||
form=form, preload_form=preload_form, ban_lists_form=ban_lists_form,
|
||||
current_app_debug=current_app.debug,
|
||||
remote_scan_form=remote_scan_form, current_app_debug=current_app.debug,
|
||||
moderating_communities=moderating_communities(current_user.get_id()),
|
||||
joined_communities=joined_communities(current_user.get_id()),
|
||||
menu_topics=menu_topics(),
|
||||
|
|
|
@ -8,8 +8,10 @@ from flask_babel import _
|
|||
|
||||
from app import db, cache, celery
|
||||
from app.activitypub.signature import post_request, default_context
|
||||
from app.activitypub.util import extract_domain_and_actor
|
||||
|
||||
from app.models import User, Community, Instance, Site, ActivityPubLog, CommunityMember, Language
|
||||
from app.utils import gibberish, topic_tree
|
||||
from app.utils import gibberish, topic_tree, get_request
|
||||
|
||||
|
||||
def unsubscribe_from_everything_then_delete(user_id):
|
||||
|
@ -124,5 +126,3 @@ def topics_for_form_children(topics, current_topic: int, depth: int) -> List[Tup
|
|||
result.extend(topics_for_form_children(topic['children'], current_topic, depth + 1))
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
<hr />
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<p>Import / Export Bans</p>
|
||||
<h4>{{ _('Import / Export Bans') }}</h4>
|
||||
<p>Use this to import or export banned instances, domains, tags, and / or users.</p>
|
||||
<p>JSON format:</p>
|
||||
<pre><code>
|
||||
{
|
||||
|
@ -38,9 +39,23 @@
|
|||
<hr />
|
||||
<div class="row">
|
||||
<div class="column">
|
||||
<p>Use this to "pre-load" known threadiverse communities, as ranked by posts and activity. The list of communities pulls from the same list as <a href="https://lemmyverse.net/communities">LemmyVerse</a>. NSFW communities and communities from banned instances are excluded.</p>
|
||||
<h4>{{ _('Remote Server Scan') }}</h4>
|
||||
<p>Use this to scan a remote lemmy server and "pre-load" it's communities, as ranked by posts and activity. NSFW communities and communities from banned instances are excluded. Communities with less than 100 posts and less than 500 active users in the past week are excluded.</p>
|
||||
<p>Input should be in the form of <b>https://server-name.tld</b></p>
|
||||
{% if current_app_debug %}
|
||||
<p>*** This instance is in development mode. Loading more than 6 communities here could cause timeouts, depending on how your networking is setup. ***</p>
|
||||
<p>*** This instance is in development mode. This function could cause timeouts depending on how your networking is setup. ***</p>
|
||||
{% endif %}
|
||||
{{ render_form(remote_scan_form) }}
|
||||
</div>
|
||||
</div>
|
||||
<hr />
|
||||
|
||||
<div class="row">
|
||||
<div class="column">
|
||||
<h4>{{ _('Load From Lemmyverse Data') }}</h4>
|
||||
<p>Use this to "pre-load" known threadiverse communities, as ranked by posts and activity. The list of communities pulls from the same list as <a href="https://lemmyverse.net/communities">LemmyVerse</a>. NSFW communities and communities from banned instances are excluded. Communities with less than 100 posts and less than 500 active users in the past week are excluded.</p>
|
||||
{% if current_app_debug %}
|
||||
<p>*** This instance is in development mode. This function could cause timeouts depending on how your networking is setup. ***</p>
|
||||
{% endif %}
|
||||
{{ render_form(preload_form) }}
|
||||
</div>
|
||||
|
|
Loading…
Add table
Reference in a new issue