diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 85a92e53..239dedcf 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -24,7 +24,8 @@ import pytesseract from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ + blocked_phrases def public_key(): @@ -1177,9 +1178,11 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep post_reply.body_html = allowlist_html(request_json['object']['content']) post_reply.body = html_to_markdown(post_reply.body_html) if post_id is not None: - # block shitpost flood - if post_reply.body and "SNEED'S" in post_reply.body: - return None + # Discard post_reply if it contains certain phrases. Good for stopping spam floods. + if post_reply.body: + for blocked_phrase in blocked_phrases(): + if blocked_phrase in post_reply.body: + return None post = Post.query.get(post_id) if post.comments_enabled: anchor = None @@ -1273,6 +1276,15 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin post.body_html = allowlist_html(request_json['object']['content']) post.body = html_to_markdown(post.body_html) + # Discard post if it contains certain phrases. Good for stopping spam floods. + blocked_phrases_list = blocked_phrases() + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.title: + return None + if post.body: + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.body: + return None if 'attachment' in request_json['object'] and len(request_json['object']['attachment']) > 0 and \ 'type' in request_json['object']['attachment'][0]: if request_json['object']['attachment'][0]['type'] == 'Link': diff --git a/app/admin/forms.py b/app/admin/forms.py index b0b8ff2c..433f224c 100644 --- a/app/admin/forms.py +++ b/app/admin/forms.py @@ -41,6 +41,7 @@ class FederationForm(FlaskForm): allowlist = TextAreaField(_l('Allow federation with these instances')) use_blocklist = BooleanField(_l('Blocklist instead of allowlist')) blocklist = TextAreaField(_l('Deny federation with these instances')) + blocked_phrases = TextAreaField(_l('Discard all posts and comments with these phrases (one per line)')) submit = SubmitField(_l('Save')) diff --git a/app/admin/routes.py b/app/admin/routes.py index a53d987d..a8068bb4 100644 --- a/app/admin/routes.py +++ b/app/admin/routes.py @@ -6,10 +6,10 @@ from flask_login import login_required, current_user from flask_babel import _ from sqlalchemy import text, desc -from app import db, celery +from app import db, celery, cache from app.activitypub.routes import process_inbox_request, process_delete_request from app.activitypub.signature import post_request -from app.activitypub.util import default_context +from app.activitypub.util import default_context, instance_allowed, instance_blocked from app.admin.forms import FederationForm, SiteMiscForm, SiteProfileForm, EditCommunityForm, EditUserForm, \ EditTopicForm, SendNewsletterForm, AddUserForm from app.admin.util import unsubscribe_from_everything_then_delete, unsubscribe_from_community, send_newsletter, \ @@ -18,7 +18,7 @@ from app.community.util import save_icon_file, save_banner_file from app.models import AllowedInstances, BannedInstances, ActivityPubLog, utcnow, Site, Community, CommunityMember, \ User, Instance, File, Report, Topic, UserRegistration, Role, Post from app.utils import render_template, permission_required, set_setting, get_setting, gibberish, markdown_to_html, \ - moderating_communities, joined_communities, finalize_user_setup, theme_list + moderating_communities, joined_communities, finalize_user_setup, theme_list, blocked_phrases from app.admin import bp @@ -123,13 +123,18 @@ def admin_federation(): for allow in form.allowlist.data.split('\n'): if allow.strip(): db.session.add(AllowedInstances(domain=allow.strip())) + cache.delete_memoized(instance_allowed, allow.strip()) if form.use_blocklist.data: set_setting('use_allowlist', False) db.session.execute(text('DELETE FROM banned_instances')) for banned in form.blocklist.data.split('\n'): if banned.strip(): db.session.add(BannedInstances(domain=banned.strip())) + cache.delete_memoized(instance_blocked, banned.strip()) + site.blocked_phrases = form.blocked_phrases.data + cache.delete_memoized(blocked_phrases) db.session.commit() + flash(_('Admin settings saved')) elif request.method == 'GET': @@ -139,6 +144,7 @@ def admin_federation(): form.blocklist.data = '\n'.join([instance.domain for instance in instances]) instances = AllowedInstances.query.all() form.allowlist.data = '\n'.join([instance.domain for instance in instances]) + form.blocked_phrases.data = site.blocked_phrases return render_template('admin/federation.html', title=_('Federation settings'), form=form, moderating_communities=moderating_communities(current_user.get_id()), diff --git a/app/community/util.py b/app/community/util.py index aedb3615..758f0bd3 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -16,7 +16,7 @@ from app.models import Community, File, BannedInstances, PostReply, PostVote, Po Instance, Notification, User, ActivityPubLog from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \ html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, \ - remove_tracking_from_link, ap_datetime, instance_banned + remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases from sqlalchemy import func, desc import os @@ -299,6 +299,19 @@ def save_post(form, post: Post): if current_user.reputation < -100: post.score = -1 post.ranking = post_ranking(post.score, utcnow()) + + # Filter by phrase + blocked_phrases_list = blocked_phrases() + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.title: + abort(401) + return + if post.body: + for blocked_phrase in blocked_phrases_list: + if blocked_phrase in post.body: + abort(401) + return + db.session.add(post) g.site.last_active = utcnow() diff --git a/app/models.py b/app/models.py index 9b2d7a10..5c0c4eb3 100644 --- a/app/models.py +++ b/app/models.py @@ -1170,7 +1170,8 @@ class Site(db.Model): allow_or_block_list = db.Column(db.Integer, default=2) # 1 = allow list, 2 = block list allowlist = db.Column(db.Text, default='') blocklist = db.Column(db.Text, default='') - auto_decline_referrers = db.Column(db.Text, default='rdrama.net') + blocked_phrases = db.Column(db.Text, default='') # discard incoming content with these phrases + auto_decline_referrers = db.Column(db.Text, default='rdrama.net') # automatically decline registration requests if the referrer is one of these created_at = db.Column(db.DateTime, default=utcnow) updated = db.Column(db.DateTime, default=utcnow) last_active = db.Column(db.DateTime, default=utcnow) diff --git a/app/post/routes.py b/app/post/routes.py index f506c68f..14990cbe 100644 --- a/app/post/routes.py +++ b/app/post/routes.py @@ -24,7 +24,7 @@ from app.utils import get_setting, render_template, allowlist_html, markdown_to_ shorten_string, markdown_to_text, gibberish, ap_datetime, return_304, \ request_etag_matches, ip_address, user_ip_banned, instance_banned, can_downvote, can_upvote, post_ranking, \ reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, moderating_communities, joined_communities, \ - blocked_instances, blocked_domains, community_moderators + blocked_instances, blocked_domains, community_moderators, blocked_phrases def show_post(post_id: int): @@ -466,6 +466,10 @@ def add_reply(post_id: int, comment_id: int): body_html=markdown_to_html(form.body.data), body_html_safe=True, from_bot=current_user.bot, nsfw=post.nsfw, nsfl=post.nsfl, notify_author=form.notify_author.data, instance_id=1) + if reply.body: + for blocked_phrase in blocked_phrases(): + if blocked_phrase in reply.body: + abort(401) db.session.add(reply) if in_reply_to.notify_author and current_user.id != in_reply_to.user_id and in_reply_to.author.ap_id is None: # todo: check if replier is blocked notification = Notification(title=shorten_string(_('Reply from %(name)s on %(post_title)s', diff --git a/app/utils.py b/app/utils.py index 84c462e3..aef7ac30 100644 --- a/app/utils.py +++ b/app/utils.py @@ -330,6 +330,15 @@ def blocked_instances(user_id) -> List[int]: return [block.instance_id for block in blocks] +@cache.memoize(timeout=86400) +def blocked_phrases() -> List[str]: + site = Site.query.get(1) + if site.blocked_phrases: + return [phrase for phrase in site.blocked_phrases.split('\n') if phrase != ''] + else: + return [] + + def retrieve_block_list(): try: response = requests.get('https://raw.githubusercontent.com/rimu/no-qanon/master/domains.txt', timeout=1) diff --git a/migrations/versions/2b028a70bd7a_blocked_phrases.py b/migrations/versions/2b028a70bd7a_blocked_phrases.py new file mode 100644 index 00000000..0f987347 --- /dev/null +++ b/migrations/versions/2b028a70bd7a_blocked_phrases.py @@ -0,0 +1,32 @@ +"""blocked phrases + +Revision ID: 2b028a70bd7a +Revises: 12d60b9d5417 +Create Date: 2024-03-22 11:50:15.405786 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '2b028a70bd7a' +down_revision = '12d60b9d5417' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('site', schema=None) as batch_op: + batch_op.add_column(sa.Column('blocked_phrases', sa.Text(), nullable=True)) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('site', schema=None) as batch_op: + batch_op.drop_column('blocked_phrases') + + # ### end Alembic commands ###