diff --git a/INSTALL.md b/INSTALL.md index 7543924d..1b403806 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,7 +2,7 @@ * [Setup Database](#setup-database) * [Install Python Libraries](#install-python-libraries) -* [Install redis-server and git](#install-redis-server-and-git) +* [Install additional requirements](#install-additional-requirements) * [Setup pyfedi](#setup-pyfedi) * [Setup .env file](#setup-env-file) * [Initialise Database and Setup Admin account](#initialise-database-and-setup-admin-account) @@ -49,13 +49,14 @@ For installation environments that use 'apt' as a package manager: `sudo apt install python3-pip python3-venv python3-dev python3-psycopg2` -
+
-## Install redis-server and git +## Install additional requirements For installation environments that use 'apt' as a package manager: `sudo apt install redis-server` -`sudo apt install git` +`sudo apt install git` +`sudo apt install tesseract-ocr`
diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index c051ee51..239b416a 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -722,10 +722,12 @@ def process_inbox_request(request_json, activitypublog_id, ip_address): if user and community: join_request = CommunityJoinRequest.query.filter_by(user_id=user.id, community_id=community.id).first() if join_request: - member = CommunityMember(user_id=user.id, community_id=community.id) - db.session.add(member) - community.subscriptions_count += 1 - db.session.commit() + existing_membership = CommunityMember.query.filter_by(user_id=user.id, community_id=community.id).first() + if not existing_membership: + member = CommunityMember(user_id=user.id, community_id=community.id) + db.session.add(member) + community.subscriptions_count += 1 + db.session.commit() activity_log.result = 'success' cache.delete_memoized(community_membership, user, community) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 67be217a..efb6983c 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -24,7 +24,7 @@ import pytesseract from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ - shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence + shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link def public_key(): @@ -1227,6 +1227,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json post.image = image else: post.type = POST_TYPE_LINK + post.url = remove_tracking_from_link(post.url) domain = domain_from_url(post.url) # notify about links to banned websites. already_notified = set() # often admins and mods are the same people - avoid notifying them twice diff --git a/app/cli.py b/app/cli.py index 685e6887..b093382e 100644 --- a/app/cli.py +++ b/app/cli.py @@ -86,7 +86,7 @@ def register(app): db.session.add(Settings(name='registration_open', value=json.dumps(True))) db.session.add(Settings(name='approve_registrations', value=json.dumps(False))) db.session.add(Settings(name='federation', value=json.dumps(True))) - banned_instances = ['lemmygrad.ml', 'gab.com', 'rqd2.net', 'exploding-heads.com', 'hexbear.net', 'threads.net', 'pieville.net', 'noauthority.social', 'pieville.net', 'links.hackliberty.org'] + banned_instances = ['anonib.al','lemmygrad.ml', 'gab.com', 'rqd2.net', 'exploding-heads.com', 'hexbear.net', 'threads.net', 'pieville.net', 'noauthority.social', 'pieville.net', 'links.hackliberty.org'] for bi in banned_instances: db.session.add(BannedInstances(domain=bi)) print("Added banned instance", bi) diff --git a/app/community/routes.py b/app/community/routes.py index b77cf25b..70ffdbc0 100644 --- a/app/community/routes.py +++ b/app/community/routes.py @@ -327,14 +327,8 @@ def unsubscribe(actor): 'id': undo_id, 'object': follow } - activity = ActivityPubLog(direction='out', activity_id=undo_id, activity_type='Undo', - activity_json=json.dumps(undo), result='processing') - db.session.add(activity) - db.session.commit() success = post_request(community.ap_inbox_url, undo, current_user.private_key, current_user.profile_id() + '#main-key') - activity.result = 'success' - db.session.commit() if not success: flash('There was a problem while trying to unsubscribe', 'error') diff --git a/app/community/util.py b/app/community/util.py index 6a6d54b9..75132ded 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \ Instance, Notification, User from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \ - html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page + html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, remove_tracking_from_link from sqlalchemy import func import os @@ -177,7 +177,7 @@ def save_post(form, post: Post): post.body = form.link_body.data post.body_html = markdown_to_html(post.body) url_changed = post.id is None or form.link_url.data != post.url - post.url = form.link_url.data + post.url = remove_tracking_from_link(form.link_url.data) post.type = POST_TYPE_LINK domain = domain_from_url(form.link_url.data) domain.post_count += 1 diff --git a/app/models.py b/app/models.py index 0bbdf1da..8d4cc870 100644 --- a/app/models.py +++ b/app/models.py @@ -207,6 +207,8 @@ class File(db.Model): os.unlink(self.file_path) if self.thumbnail_path and os.path.isfile(self.thumbnail_path): os.unlink(self.thumbnail_path) + if self.source_url and not self.source_url.startswith('http') and os.path.isfile(self.source_url): + os.unlink(self.source_url) def filesize(self): size = 0 diff --git a/app/user/routes.py b/app/user/routes.py index 091d2338..cbd00cf6 100644 --- a/app/user/routes.py +++ b/app/user/routes.py @@ -25,7 +25,10 @@ import os @bp.route('/people', methods=['GET', 'POST']) @login_required def show_people(): - people = User.query.filter_by(ap_id=None, deleted=False, banned=False).all() + if current_user.is_admin(): + people = User.query.filter_by(ap_id=None, deleted=False, banned=False).all() + else: + people = User.query.filter_by(ap_id=None, deleted=False, banned=False, searchable=True).all() return render_template('user/people.html', people=people, moderating_communities=moderating_communities(current_user.get_id()), joined_communities=joined_communities(current_user.get_id()), title=_('People')) diff --git a/app/utils.py b/app/utils.py index e7ba8f10..93cf3559 100644 --- a/app/utils.py +++ b/app/utils.py @@ -10,7 +10,7 @@ from typing import List, Literal, Union import markdown2 import math -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode from functools import wraps import flask from bs4 import BeautifulSoup, NavigableString @@ -163,7 +163,7 @@ def is_image_url(url): def allowlist_html(html: str) -> str: if html is None or html == '': return '' - allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h3', 'h4', 'h5', 'pre', + allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead'] # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html, 'html.parser') @@ -260,9 +260,12 @@ def markdown_to_text(markdown_text) -> str: def domain_from_url(url: str, create=True) -> Domain: parsed_url = urlparse(url.lower().replace('www.', '')) if parsed_url and parsed_url.hostname: - domain = Domain.query.filter_by(name=parsed_url.hostname.lower()).first() + find_this = parsed_url.hostname.lower() + if find_this == 'youtu.be': + find_this = 'youtube.com' + domain = Domain.query.filter_by(name=find_this).first() if create and domain is None: - domain = Domain(name=parsed_url.hostname.lower()) + domain = Domain(name=find_this) db.session.add(domain) db.session.commit() return domain @@ -745,3 +748,28 @@ def sha256_digest(input_string): sha256_hash = hashlib.sha256() sha256_hash.update(input_string.encode('utf-8')) return sha256_hash.hexdigest() + + +def remove_tracking_from_link(url): + parsed_url = urlparse(url) + + if parsed_url.netloc == 'youtu.be': + # Extract video ID + video_id = parsed_url.path[1:] # Remove leading slash + + # Preserve 't' parameter if it exists + query_params = parse_qs(parsed_url.query) + if 't' in query_params: + new_query_params = {'t': query_params['t']} + new_query_string = urlencode(new_query_params, doseq=True) + else: + new_query_string = '' + + cleaned_url = f"https://youtu.be/{video_id}" + if new_query_string: + cleaned_url += f"?{new_query_string}" + + return cleaned_url + else: + return url +