From 93ee376525ae6dd3d697994e31c7cab9e2a12940 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Tue, 31 Dec 2024 13:55:24 +1300 Subject: [PATCH] utilize mastodon-style regular expressions for defederation --- app/activitypub/routes.py | 6 +++--- app/activitypub/util.py | 16 +++------------- app/admin/routes.py | 6 +++--- app/utils.py | 13 +++++++++++-- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index 1873e7aa..9a13207f 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -19,7 +19,7 @@ from app.models import User, Community, CommunityJoinRequest, CommunityMember, C PostReply, Instance, PostVote, PostReplyVote, File, AllowedInstances, BannedInstances, utcnow, Site, Notification, \ ChatMessage, Conversation, UserFollower, UserBlock, Poll, PollChoice from app.activitypub.util import public_key, users_total, active_half_year, active_month, local_posts, local_comments, \ - post_to_activity, find_actor_or_create, instance_blocked, find_reply_parent, find_liked_object, \ + post_to_activity, find_actor_or_create, find_reply_parent, find_liked_object, \ lemmy_site_data, is_activitypub_request, delete_post_or_comment, community_members, \ user_removed_from_remote_server, create_post, create_post_reply, update_post_reply_from_activity, \ update_post_from_activity, undo_vote, undo_downvote, post_to_page, get_redis_connection, find_reported_object, \ @@ -29,7 +29,7 @@ from app.activitypub.util import public_key, users_total, active_half_year, acti from app.utils import gibberish, get_setting, render_template, \ community_membership, ap_datetime, ip_address, can_downvote, \ can_upvote, can_create_post, awaken_dormant_instance, shorten_string, can_create_post_reply, sha256_digest, \ - community_moderators, markdown_to_html, html_to_text, add_to_modlog_activitypub + community_moderators, html_to_text, add_to_modlog_activitypub, instance_banned @bp.route('/testredis') @@ -1311,7 +1311,7 @@ def announce_activity_to_followers(community, creator, activity): awaken_dormant_instance(instance) # All good? Send! - if instance and instance.online() and not instance_blocked(instance.inbox): + if instance and instance.online() and not instance_banned(instance.inbox): if creator.instance_id != instance.id: # don't send it to the instance that hosts the creator as presumably they already have the content send_to_remote_instance(instance.id, community.id, announce_activity) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index e7da30dc..2ae26693 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -2,6 +2,7 @@ from __future__ import annotations import html import os +import re from datetime import timedelta, datetime, timezone from random import randint from typing import Union, Tuple, List @@ -32,7 +33,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar microblog_content_to_title, is_video_url, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ html_to_text, add_to_modlog_activitypub, joined_communities, \ - moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse + moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse, instance_banned from sqlalchemy import or_ @@ -236,17 +237,6 @@ def banned_user_agents(): return [] # todo: finish this function -@cache.memoize(150) -def instance_blocked(host: str) -> bool: # see also utils.instance_banned() - if host is None or host == '': - return True - host = host.lower() - if 'https://' in host or 'http://' in host: - host = urlparse(host).hostname - instance = BannedInstances.query.filter_by(domain=host.strip()).first() - return instance is not None - - @cache.memoize(150) def instance_allowed(host: str) -> bool: if host is None or host == '': @@ -282,7 +272,7 @@ def find_actor_or_create(actor: str, create_if_not_found=True, community_only=Fa if not instance_allowed(server): return None else: - if instance_blocked(server): + if instance_banned(server): return None if actor_contains_blocked_words(actor): return None diff --git a/app/admin/routes.py b/app/admin/routes.py index f458464e..6514dccd 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -16,7 +16,7 @@ from urllib.parse import urlparse from app import db, celery, cache from app.activitypub.routes import process_inbox_request, process_delete_request, replay_inbox_request from app.activitypub.signature import post_request, default_context -from app.activitypub.util import instance_allowed, instance_blocked, extract_domain_and_actor +from app.activitypub.util import instance_allowed, extract_domain_and_actor from app.admin.forms import FederationForm, SiteMiscForm, SiteProfileForm, EditCommunityForm, EditUserForm, \ EditTopicForm, SendNewsletterForm, AddUserForm, PreLoadCommunitiesForm, ImportExportBannedListsForm, \ EditInstanceForm, RemoteInstanceScanForm @@ -32,7 +32,7 @@ from app.models import AllowedInstances, BannedInstances, ActivityPubLog, utcnow from app.utils import render_template, permission_required, set_setting, get_setting, gibberish, markdown_to_html, \ moderating_communities, joined_communities, finalize_user_setup, theme_list, blocked_phrases, blocked_referrers, \ topic_tree, languages_for_form, menu_topics, ensure_directory_exists, add_to_modlog, get_request, file_get_contents, \ - download_defeds + download_defeds, instance_banned from app.admin import bp @@ -666,7 +666,7 @@ def admin_federation(): for banned in form.blocklist.data.split('\n'): if banned.strip(): db.session.add(BannedInstances(domain=banned.strip())) - cache.delete_memoized(instance_blocked, banned.strip()) + cache.delete_memoized(instance_banned, banned.strip()) # update and sync defederation subscriptions db.session.execute(text('DELETE FROM banned_instances WHERE subscription_id is not null')) diff --git a/app/utils.py b/app/utils.py index 68fd66a7..81aff02d 100644 --- a/app/utils.py +++ b/app/utils.py @@ -656,12 +656,21 @@ def user_ip_banned() -> bool: return current_ip_address in banned_ip_addresses() -@cache.memoize(timeout=60) +@cache.memoize(timeout=150) def instance_banned(domain: str) -> bool: # see also activitypub.util.instance_blocked() if domain is None or domain == '': return False + domain = domain.lower().strip() + if 'https://' in domain or 'http://' in domain: + domain = urlparse(domain).hostname banned = BannedInstances.query.filter_by(domain=domain).first() - return banned is not None + if banned is not None: + return True + + # Mastodon sometimes bans with a * in the domain name, meaning "any letter", e.g. "cum.**mp" + regex_patterns = [re.compile(f"^{cond.domain.replace('*', '[a-zA-Z0-9]')}$") for cond in + BannedInstances.query.filter(BannedInstances.domain.like('%*%')).all()] + return any(pattern.match(domain) for pattern in regex_patterns) def user_cookie_banned() -> bool: