From 721bf496189318c1614b93874984698599749b9b Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 10:45:36 +0000 Subject: [PATCH 01/11] Remove duplicate activitypub logging --- app/community/routes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/app/community/routes.py b/app/community/routes.py index b77cf25b..70ffdbc0 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -327,14 +327,8 @@ def unsubscribe(actor): 'id': undo_id, 'object': follow } - activity = ActivityPubLog(direction='out', activity_id=undo_id, activity_type='Undo', - activity_json=json.dumps(undo), result='processing') - db.session.add(activity) - db.session.commit() success = post_request(community.ap_inbox_url, undo, current_user.private_key, current_user.profile_id() + '#main-key') - activity.result = 'success' - db.session.commit() if not success: flash('There was a problem while trying to unsubscribe', 'error') From 2fd81604d11c2f4d66a2dc572fe4eb19300dfd5f Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 10:51:50 +0000 Subject: [PATCH 02/11] Update INSTALL.md --- INSTALL.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 7543924d..1b403806 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,7 +2,7 @@ * [Setup Database](#setup-database) * [Install Python Libraries](#install-python-libraries) -* [Install redis-server and git](#install-redis-server-and-git) +* [Install additional requirements](#install-additional-requirements) * [Setup pyfedi](#setup-pyfedi) * [Setup .env file](#setup-env-file) * [Initialise Database and Setup Admin account](#initialise-database-and-setup-admin-account) @@ -49,13 +49,14 @@ For installation environments that use 'apt' as a package manager: `sudo apt install python3-pip python3-venv python3-dev python3-psycopg2` -
+
-## Install redis-server and git +## Install additional requirements For installation environments that use 'apt' as a package manager: `sudo apt install redis-server` -`sudo apt install git` +`sudo apt install git` +`sudo apt install tesseract-ocr`
From 0909256288caed0310447fa4525c0c47ce5654f6 Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 7 Mar 2024 12:39:14 +0100 Subject: [PATCH 03/11] fix string formatting of 'link not allowed' --- app/community/forms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/community/forms.py b/app/community/forms.py index b3713012..e28fce4e 100644 --- a/app/community/forms.py +++ b/app/community/forms.py @@ -81,7 +81,7 @@ class CreatePostForm(FlaskForm): return False domain = domain_from_url(self.link_url.data, create=False) if domain and domain.banned: - self.link_url.errors.append(_(f"Links to %s are not allowed.".format(domain.name))) + self.link_url.errors.append(_("Links to %s are not allowed." % (domain.name))) return False elif self.post_type.data == 'image': if self.image_title.data == '': From 2ba69f2517a2e6349030f9d31f8e70d84a340871 Mon Sep 17 00:00:00 2001 From: rra Date: Thu, 7 Mar 2024 12:42:28 +0100 Subject: [PATCH 04/11] inform that blocklists and interests are beign configured in init-db #80 --- app/cli.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/app/cli.py b/app/cli.py index d37646cc..685e6887 100644 --- a/app/cli.py +++ b/app/cli.py @@ -86,16 +86,13 @@ def register(app): db.session.add(Settings(name='registration_open', value=json.dumps(True))) db.session.add(Settings(name='approve_registrations', value=json.dumps(False))) db.session.add(Settings(name='federation', value=json.dumps(True))) - db.session.add(BannedInstances(domain='lemmygrad.ml')) - db.session.add(BannedInstances(domain='gab.com')) - db.session.add(BannedInstances(domain='rqd2.net')) - db.session.add(BannedInstances(domain='exploding-heads.com')) - db.session.add(BannedInstances(domain='hexbear.net')) - db.session.add(BannedInstances(domain='threads.net')) - db.session.add(BannedInstances(domain='pieville.net')) - db.session.add(BannedInstances(domain='noauthority.social')) - db.session.add(BannedInstances(domain='pieville.net')) - db.session.add(BannedInstances(domain='links.hackliberty.org')) + banned_instances = ['lemmygrad.ml', 'gab.com', 'rqd2.net', 'exploding-heads.com', 'hexbear.net', 'threads.net', 'pieville.net', 'noauthority.social', 'pieville.net', 'links.hackliberty.org'] + for bi in banned_instances: + db.session.add(BannedInstances(domain=bi)) + print("Added banned instance", bi) + + print("Populating DB with instances and interests") + print("See interests.txt") interests = file_get_contents('interests.txt') db.session.add(Interest(name='🕊 Chilling', communities=parse_communities(interests, 'chilling'))) db.session.add(Interest(name='💭 Interesting stuff', communities=parse_communities(interests, 'interesting stuff'))) @@ -114,12 +111,14 @@ def register(app): if block_list: for domain in block_list.split('\n'): db.session.add(Domain(name=domain.strip(), banned=True)) + print("Added 'No-QAnon' blocklist, see https://github.com/rimu/no-qanon") # Load peertube domain block list block_list = retrieve_peertube_block_list() if block_list: for domain in block_list.split('\n'): db.session.add(Domain(name=domain.strip(), banned=True)) + print("Added 'Peertube Isolation' blocklist, see https://peertube_isolation.frama.io/") # Initial roles anon_role = Role(name='Anonymous user', weight=0) From 3e5d040beb13c24b4e77b11ef0ad6791b6846931 Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 21:00:11 +0000 Subject: [PATCH 05/11] Add header tags to allowlist --- app/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils.py b/app/utils.py index e7ba8f10..de83ecd0 100644 --- a/app/utils.py +++ b/app/utils.py @@ -163,7 +163,7 @@ def is_image_url(url): def allowlist_html(html: str) -> str: if html is None or html == '': return '' - allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h3', 'h4', 'h5', 'pre', + allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead'] # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html, 'html.parser') From 9fa260632ab2233639f484dd002a45ec13bb0df0 Mon Sep 17 00:00:00 2001 From: freamon Date: Fri, 8 Mar 2024 01:33:58 +0000 Subject: [PATCH 06/11] Clean tracking info from youtube links --- app/activitypub/util.py | 3 ++- app/community/util.py | 4 ++-- app/utils.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 67be217a..98a33341 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -24,7 +24,7 @@ import pytesseract from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, clean_link def public_key(): @@ -1227,6 +1227,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json post.image = image else: post.type = POST_TYPE_LINK + post.url = clean_link(post.url) domain = domain_from_url(post.url) # notify about links to banned websites. already_notified = set() # often admins and mods are the same people - avoid notifying them twice diff --git a/app/community/util.py b/app/community/util.py index 6a6d54b9..98062d32 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \ Instance, Notification, User from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \ - html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page + html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, clean_link from sqlalchemy import func import os @@ -177,7 +177,7 @@ def save_post(form, post: Post): post.body = form.link_body.data post.body_html = markdown_to_html(post.body) url_changed = post.id is None or form.link_url.data != post.url - post.url = form.link_url.data + post.url = clean_link(form.link_url.data) post.type = POST_TYPE_LINK domain = domain_from_url(form.link_url.data) domain.post_count += 1 diff --git a/app/utils.py b/app/utils.py index de83ecd0..26b7a664 100644 --- a/app/utils.py +++ b/app/utils.py @@ -745,3 +745,14 @@ def sha256_digest(input_string): sha256_hash = hashlib.sha256() sha256_hash.update(input_string.encode('utf-8')) return sha256_hash.hexdigest() + + +def clean_link(url): + # strip ?si=abcDEFgh from youtu.be links + clean = re.search(r"(https://youtu.be/\w+)", url) + + if clean is not None: + return clean.group(1) + else: + return url + From 6c4d0d217fa9b864bb10a65db2ef178f9b698af4 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Fri, 8 Mar 2024 21:40:47 +1300 Subject: [PATCH 07/11] minor bugfixes --- app/activitypub/routes.py | 10 ++++++---- app/cli.py | 1 + app/models.py | 2 ++ app/user/routes.py | 5 ++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index c051ee51..239b416a 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -722,10 +722,12 @@ def process_inbox_request(request_json, activitypublog_id, ip_address): if user and community: join_request = CommunityJoinRequest.query.filter_by(user_id=user.id, community_id=community.id).first() if join_request: - member = CommunityMember(user_id=user.id, community_id=community.id) - db.session.add(member) - community.subscriptions_count += 1 - db.session.commit() + existing_membership = CommunityMember.query.filter_by(user_id=user.id, community_id=community.id).first() + if not existing_membership: + member = CommunityMember(user_id=user.id, community_id=community.id) + db.session.add(member) + community.subscriptions_count += 1 + db.session.commit() activity_log.result = 'success' cache.delete_memoized(community_membership, user, community) diff --git a/app/cli.py b/app/cli.py index d37646cc..5847d6b2 100644 --- a/app/cli.py +++ b/app/cli.py @@ -120,6 +120,7 @@ def register(app): if block_list: for domain in block_list.split('\n'): db.session.add(Domain(name=domain.strip(), banned=True)) + db.session.add(Domain(name='anonib.al', banned=True)) # Initial roles anon_role = Role(name='Anonymous user', weight=0) diff --git a/app/models.py b/app/models.py index 0bbdf1da..8d4cc870 100644 --- a/app/models.py +++ b/app/models.py @@ -207,6 +207,8 @@ class File(db.Model): os.unlink(self.file_path) if self.thumbnail_path and os.path.isfile(self.thumbnail_path): os.unlink(self.thumbnail_path) + if self.source_url and not self.source_url.startswith('http') and os.path.isfile(self.source_url): + os.unlink(self.source_url) def filesize(self): size = 0 diff --git a/app/user/routes.py b/app/user/routes.py index 091d2338..cbd00cf6 100644 --- a/app/user/routes.py +++ b/app/user/routes.py @@ -25,7 +25,10 @@ import os @bp.route('/people', methods=['GET', 'POST']) @login_required def show_people(): - people = User.query.filter_by(ap_id=None, deleted=False, banned=False).all() + if current_user.is_admin(): + people = User.query.filter_by(ap_id=None, deleted=False, banned=False).all() + else: + people = User.query.filter_by(ap_id=None, deleted=False, banned=False, searchable=True).all() return render_template('user/people.html', people=people, moderating_communities=moderating_communities(current_user.get_id()), joined_communities=joined_communities(current_user.get_id()), title=_('People')) From 6914a47206481b0e47a1732a00322d04f048367c Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Fri, 8 Mar 2024 22:01:46 +1300 Subject: [PATCH 08/11] remove tracking from links fixes #34 --- app/activitypub/util.py | 4 ++-- app/community/util.py | 4 ++-- app/utils.py | 9 ++++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 98a33341..efb6983c 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -24,7 +24,7 @@ import pytesseract from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, clean_link + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link def public_key(): @@ -1227,7 +1227,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json post.image = image else: post.type = POST_TYPE_LINK - post.url = clean_link(post.url) + post.url = remove_tracking_from_link(post.url) domain = domain_from_url(post.url) # notify about links to banned websites. already_notified = set() # often admins and mods are the same people - avoid notifying them twice diff --git a/app/community/util.py b/app/community/util.py index 98062d32..75132ded 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \ Instance, Notification, User from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \ - html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, clean_link + html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, remove_tracking_from_link from sqlalchemy import func import os @@ -177,7 +177,7 @@ def save_post(form, post: Post): post.body = form.link_body.data post.body_html = markdown_to_html(post.body) url_changed = post.id is None or form.link_url.data != post.url - post.url = clean_link(form.link_url.data) + post.url = remove_tracking_from_link(form.link_url.data) post.type = POST_TYPE_LINK domain = domain_from_url(form.link_url.data) domain.post_count += 1 diff --git a/app/utils.py b/app/utils.py index 26b7a664..c9bfea4a 100644 --- a/app/utils.py +++ b/app/utils.py @@ -260,9 +260,12 @@ def markdown_to_text(markdown_text) -> str: def domain_from_url(url: str, create=True) -> Domain: parsed_url = urlparse(url.lower().replace('www.', '')) if parsed_url and parsed_url.hostname: - domain = Domain.query.filter_by(name=parsed_url.hostname.lower()).first() + find_this = parsed_url.hostname.lower() + if find_this == 'youtu.be': + find_this = 'youtube.com' + domain = Domain.query.filter_by(name=find_this).first() if create and domain is None: - domain = Domain(name=parsed_url.hostname.lower()) + domain = Domain(name=find_this) db.session.add(domain) db.session.commit() return domain @@ -747,7 +750,7 @@ def sha256_digest(input_string): return sha256_hash.hexdigest() -def clean_link(url): +def remove_tracking_from_link(url): # strip ?si=abcDEFgh from youtu.be links clean = re.search(r"(https://youtu.be/\w+)", url) From 01423586b555f0eca6a7f868b1f4590aba89d548 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Fri, 8 Mar 2024 22:09:54 +1300 Subject: [PATCH 09/11] refactor remove_tracking_from_link to preserve the t parameter in youtube urls, without regex #34 --- app/utils.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/app/utils.py b/app/utils.py index c9bfea4a..93cf3559 100644 --- a/app/utils.py +++ b/app/utils.py @@ -10,7 +10,7 @@ from typing import List, Literal, Union import markdown2 import math -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode from functools import wraps import flask from bs4 import BeautifulSoup, NavigableString @@ -751,11 +751,25 @@ def sha256_digest(input_string): def remove_tracking_from_link(url): - # strip ?si=abcDEFgh from youtu.be links - clean = re.search(r"(https://youtu.be/\w+)", url) + parsed_url = urlparse(url) - if clean is not None: - return clean.group(1) + if parsed_url.netloc == 'youtu.be': + # Extract video ID + video_id = parsed_url.path[1:] # Remove leading slash + + # Preserve 't' parameter if it exists + query_params = parse_qs(parsed_url.query) + if 't' in query_params: + new_query_params = {'t': query_params['t']} + new_query_string = urlencode(new_query_params, doseq=True) + else: + new_query_string = '' + + cleaned_url = f"https://youtu.be/{video_id}" + if new_query_string: + cleaned_url += f"?{new_query_string}" + + return cleaned_url else: return url From a10ed015f27dbaf0aa49a6aae244048927f3b3a8 Mon Sep 17 00:00:00 2001 From: rra Date: Fri, 8 Mar 2024 10:34:17 +0100 Subject: [PATCH 10/11] properly vertically align side cards --- app/templates/index.html | 2 +- app/templates/user/people.html | 2 +- app/templates/user/show_profile.html | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/templates/index.html b/app/templates/index.html index 63d3e26a..3d195128 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -42,7 +42,7 @@ --> -
+

{{ _('Active communities') }}

diff --git a/app/templates/user/people.html b/app/templates/user/people.html index 64085d27..c40708f4 100644 --- a/app/templates/user/people.html +++ b/app/templates/user/people.html @@ -35,7 +35,7 @@