Merge remote-tracking branch 'origin/main'

2025-01-23 19:36:56 -08:00 · 2024-03-08 21:52:17 +13:00 · 2024-03-08 21:52:17 +13:00 · 47bf0ddedb
commit 47bf0ddedb
parent 6c4d0d217f 2c513d23b9
5 changed files with 21 additions and 14 deletions
--- a/INSTALL.md
+++ b/INSTALL.md
@ -2,7 +2,7 @@

 * [Setup Database](#setup-database)   
 * [Install Python Libraries](#install-python-libraries)      
-* [Install redis-server and git](#install-redis-server-and-git)      
+* [Install additional requirements](#install-additional-requirements)      
 * [Setup pyfedi](#setup-pyfedi)
 * [Setup .env file](#setup-env-file)
 * [Initialise Database and Setup Admin account](#initialise-database-and-setup-admin-account)
@ -49,13 +49,14 @@ For installation environments that use 'apt' as a package manager:
 `sudo apt install python3-pip python3-venv python3-dev python3-psycopg2` 


-<div id="install-redis-server-and-git"></div>
+<div id="install-additional-requirements"></div>

-## Install redis-server and git
+## Install additional requirements

 For installation environments that use 'apt' as a package manager:        
 `sudo apt install redis-server`       
 `sudo apt install git`     
+`sudo apt install tesseract-ocr`

 <div id="setup-pyfedi"></div>

--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@ -24,7 +24,7 @@ import pytesseract

 from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
    is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
-    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence
+    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, clean_link


 def public_key():
@ -1227,6 +1227,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
                post.image = image
            else:
                post.type = POST_TYPE_LINK
+                post.url = clean_link(post.url)
            domain = domain_from_url(post.url)
            # notify about links to banned websites.
            already_notified = set()  # often admins and mods are the same people - avoid notifying them twice
--- a/app/community/routes.py
+++ b/app/community/routes.py
@ -327,14 +327,8 @@ def unsubscribe(actor):
                        'id': undo_id,
                        'object': follow
                    }
-                    activity = ActivityPubLog(direction='out', activity_id=undo_id, activity_type='Undo',
-                                              activity_json=json.dumps(undo), result='processing')
-                    db.session.add(activity)
-                    db.session.commit()
                    success = post_request(community.ap_inbox_url, undo, current_user.private_key,
                                                               current_user.profile_id() + '#main-key')
-                    activity.result = 'success'
-                    db.session.commit()
                    if not success:
                        flash('There was a problem while trying to unsubscribe', 'error')

--- a/app/community/util.py
+++ b/app/community/util.py
@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE
 from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \
    Instance, Notification, User
 from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \
-    html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page
+    html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, clean_link
 from sqlalchemy import func
 import os

@ -177,7 +177,7 @@ def save_post(form, post: Post):
        post.body = form.link_body.data
        post.body_html = markdown_to_html(post.body)
        url_changed = post.id is None or form.link_url.data != post.url
-        post.url = form.link_url.data
+        post.url = clean_link(form.link_url.data)
        post.type = POST_TYPE_LINK
        domain = domain_from_url(form.link_url.data)
        domain.post_count += 1
--- a/app/utils.py
+++ b/app/utils.py
@ -163,7 +163,7 @@ def is_image_url(url):
 def allowlist_html(html: str) -> str:
    if html is None or html == '':
        return ''
-    allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h3', 'h4', 'h5', 'pre',
+    allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre',
                    'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead']
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
@ -745,3 +745,14 @@ def sha256_digest(input_string):
    sha256_hash = hashlib.sha256()
    sha256_hash.update(input_string.encode('utf-8'))
    return sha256_hash.hexdigest()
+
+
+def clean_link(url):
+    # strip ?si=abcDEFgh from youtu.be links
+    clean = re.search(r"(https://youtu.be/\w+)", url)
+
+    if clean is not None:
+        return clean.group(1)
+    else:
+        return url
+