From 721bf496189318c1614b93874984698599749b9b Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 10:45:36 +0000 Subject: [PATCH 1/4] Remove duplicate activitypub logging --- app/community/routes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/app/community/routes.py b/app/community/routes.py index b77cf25b..70ffdbc0 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -327,14 +327,8 @@ def unsubscribe(actor): 'id': undo_id, 'object': follow } - activity = ActivityPubLog(direction='out', activity_id=undo_id, activity_type='Undo', - activity_json=json.dumps(undo), result='processing') - db.session.add(activity) - db.session.commit() success = post_request(community.ap_inbox_url, undo, current_user.private_key, current_user.profile_id() + '#main-key') - activity.result = 'success' - db.session.commit() if not success: flash('There was a problem while trying to unsubscribe', 'error') From 2fd81604d11c2f4d66a2dc572fe4eb19300dfd5f Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 10:51:50 +0000 Subject: [PATCH 2/4] Update INSTALL.md --- INSTALL.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 7543924d..1b403806 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,7 +2,7 @@ * [Setup Database](#setup-database) * [Install Python Libraries](#install-python-libraries) -* [Install redis-server and git](#install-redis-server-and-git) +* [Install additional requirements](#install-additional-requirements) * [Setup pyfedi](#setup-pyfedi) * [Setup .env file](#setup-env-file) * [Initialise Database and Setup Admin account](#initialise-database-and-setup-admin-account) @@ -49,13 +49,14 @@ For installation environments that use 'apt' as a package manager: `sudo apt install python3-pip python3-venv python3-dev python3-psycopg2` -
+
-## Install redis-server and git +## Install additional requirements For installation environments that use 'apt' as a package manager: `sudo apt install redis-server` -`sudo apt install git` +`sudo apt install git` +`sudo apt install tesseract-ocr`
From 3e5d040beb13c24b4e77b11ef0ad6791b6846931 Mon Sep 17 00:00:00 2001 From: freamon Date: Thu, 7 Mar 2024 21:00:11 +0000 Subject: [PATCH 3/4] Add header tags to allowlist --- app/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils.py b/app/utils.py index e7ba8f10..de83ecd0 100644 --- a/app/utils.py +++ b/app/utils.py @@ -163,7 +163,7 @@ def is_image_url(url): def allowlist_html(html: str) -> str: if html is None or html == '': return '' - allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h3', 'h4', 'h5', 'pre', + allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead'] # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html, 'html.parser') From 9fa260632ab2233639f484dd002a45ec13bb0df0 Mon Sep 17 00:00:00 2001 From: freamon Date: Fri, 8 Mar 2024 01:33:58 +0000 Subject: [PATCH 4/4] Clean tracking info from youtube links --- app/activitypub/util.py | 3 ++- app/community/util.py | 4 ++-- app/utils.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 67be217a..98a33341 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -24,7 +24,7 @@ import pytesseract from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, clean_link def public_key(): @@ -1227,6 +1227,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json post.image = image else: post.type = POST_TYPE_LINK + post.url = clean_link(post.url) domain = domain_from_url(post.url) # notify about links to banned websites. already_notified = set() # often admins and mods are the same people - avoid notifying them twice diff --git a/app/community/util.py b/app/community/util.py index 6a6d54b9..98062d32 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \ Instance, Notification, User from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \ - html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page + html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, clean_link from sqlalchemy import func import os @@ -177,7 +177,7 @@ def save_post(form, post: Post): post.body = form.link_body.data post.body_html = markdown_to_html(post.body) url_changed = post.id is None or form.link_url.data != post.url - post.url = form.link_url.data + post.url = clean_link(form.link_url.data) post.type = POST_TYPE_LINK domain = domain_from_url(form.link_url.data) domain.post_count += 1 diff --git a/app/utils.py b/app/utils.py index de83ecd0..26b7a664 100644 --- a/app/utils.py +++ b/app/utils.py @@ -745,3 +745,14 @@ def sha256_digest(input_string): sha256_hash = hashlib.sha256() sha256_hash.update(input_string.encode('utf-8')) return sha256_hash.hexdigest() + + +def clean_link(url): + # strip ?si=abcDEFgh from youtu.be links + clean = re.search(r"(https://youtu.be/\w+)", url) + + if clean is not None: + return clean.group(1) + else: + return url +