Render Markdown soft breaks as new lines for new local content #133

2025-01-23 19:36:56 -08:00 · 2024-05-14 19:33:08 +01:00 · 2024-05-14 19:33:08 +01:00 · c77a6de126
commit c77a6de126
parent e969a2e8bf
3 changed files with 30 additions and 16 deletions
--- a/app/activitypub/routes.py
+++ b/app/activitypub/routes.py
@ -25,7 +25,7 @@ from app.activitypub.util import public_key, users_total, active_half_year, acti
 from app.utils import gibberish, get_setting, is_image_url, allowlist_html, render_template, \
    domain_from_url, markdown_to_html, community_membership, ap_datetime, ip_address, can_downvote, \
    can_upvote, can_create_post, awaken_dormant_instance, shorten_string, can_create_post_reply, sha256_digest, \
-    community_moderators
+    community_moderators, lemmy_markdown_to_html
 import werkzeug.exceptions


@ -474,7 +474,7 @@ def process_inbox_request(request_json, activitypublog_id, ip_address):
                                encrypted = request_json['object']['encrypted'] if 'encrypted' in request_json['object'] else None
                                new_message = ChatMessage(sender_id=sender.id, recipient_id=recipient.id, conversation_id=existing_conversation.id,
                                                          body=request_json['object']['source']['content'],
-                                                          body_html=allowlist_html(markdown_to_html(request_json['object']['source']['content'])),
+                                                          body_html=allowlist_html(lemmy_markdown_to_html(request_json['object']['source']['content'])),
                                                          encrypted=encrypted)
                                db.session.add(new_message)
                                existing_conversation.updated_at = utcnow()
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@ -30,7 +30,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
    is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
    shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
    blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
-    notification_subscribers, communities_banned_from
+    notification_subscribers, communities_banned_from, lemmy_markdown_to_html


 def public_key():
@ -522,7 +522,7 @@ def refresh_community_profile_task(community_id):
            community.title = activity_json['name']
            community.description = activity_json['summary'] if 'summary' in activity_json else ''
            community.rules = activity_json['rules'] if 'rules' in activity_json else ''
-            community.rules_html = markdown_to_html(activity_json['rules'] if 'rules' in activity_json else '')
+            community.rules_html = lemmy_markdown_to_html(activity_json['rules'] if 'rules' in activity_json else '')
            community.restricted_to_mods = activity_json['postingRestrictedToMods']
            community.new_mods_wanted = activity_json['newModsWanted'] if 'newModsWanted' in activity_json else False
            community.private_mods = activity_json['privateMods'] if 'privateMods' in activity_json else False
@ -533,7 +533,7 @@ def refresh_community_profile_task(community_id):
            if 'source' in activity_json and \
                    activity_json['source']['mediaType'] == 'text/markdown':
                community.description = activity_json['source']['content']
-                community.description_html = markdown_to_html(community.description)
+                community.description_html = lemmy_markdown_to_html(community.description)
            elif 'content' in activity_json:
                community.description_html = allowlist_html(activity_json['content'])
                community.description = ''
@ -663,7 +663,7 @@ def actor_json_to_model(activity_json, address, server):
                              title=activity_json['name'],
                              description=activity_json['summary'] if 'summary' in activity_json else '',
                              rules=activity_json['rules'] if 'rules' in activity_json else '',
-                              rules_html=markdown_to_html(activity_json['rules'] if 'rules' in activity_json else ''),
+                              rules_html=lemmy_markdown_to_html(activity_json['rules'] if 'rules' in activity_json else ''),
                              nsfw=activity_json['sensitive'],
                              restricted_to_mods=activity_json['postingRestrictedToMods'],
                              new_mods_wanted=activity_json['newModsWanted'] if 'newModsWanted' in activity_json else False,
@ -689,7 +689,7 @@ def actor_json_to_model(activity_json, address, server):
        if 'source' in activity_json and \
                activity_json['source']['mediaType'] == 'text/markdown':
            community.description = activity_json['source']['content']
-            community.description_html = markdown_to_html(community.description)
+            community.description_html = lemmy_markdown_to_html(community.description)
        elif 'content' in activity_json:
            community.description_html = allowlist_html(activity_json['content'])
            community.description = ''
@ -732,7 +732,7 @@ def post_json_to_model(activity_log, post_json, user, community) -> Post:
        if 'source' in post_json and \
                post_json['source']['mediaType'] == 'text/markdown':
            post.body = post_json['source']['content']
-            post.body_html = markdown_to_html(post.body)
+            post.body_html = lemmy_markdown_to_html(post.body)
        elif 'content' in post_json:
            post.body_html = allowlist_html(post_json['content'])
            post.body = ''
@ -938,7 +938,7 @@ def parse_summary(user_json) -> str:
    if 'source' in user_json and user_json['source'].get('mediaType') == 'text/markdown':
        # Convert Markdown to HTML
        markdown_text = user_json['source']['content']
-        html_content = allowlist_html(markdown_to_html(markdown_text))
+        html_content = allowlist_html(lemmy_markdown_to_html(markdown_text))
        return html_content
    elif 'summary' in user_json:
        return allowlist_html(user_json['summary'])
@ -1299,7 +1299,7 @@ def delete_post_or_comment_task(user_ap_id, community_ap_id, to_be_deleted_ap_id
                to_delete.post.reply_count -= 1
                if to_delete.has_replies():
                    to_delete.body = 'Deleted by author' if to_delete.author.id == deletor.id else 'Deleted by moderator'
-                    to_delete.body_html = markdown_to_html(to_delete.body)
+                    to_delete.body_html = lemmy_markdown_to_html(to_delete.body)
                else:
                    to_delete.delete_dependencies()
                    db.session.delete(to_delete)
@ -1339,7 +1339,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
                'mediaType' in request_json['object']['source'] and \
                request_json['object']['source']['mediaType'] == 'text/markdown':
            post_reply.body = request_json['object']['source']['content']
-            post_reply.body_html = markdown_to_html(post_reply.body)
+            post_reply.body_html = lemmy_markdown_to_html(post_reply.body)
        elif 'content' in request_json['object']:   # Kbin
            post_reply.body_html = allowlist_html(request_json['object']['content'])
            post_reply.body = ''
@ -1362,7 +1362,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
                    post.body = "🤖 I'm a bot that provides automatic summaries for articles:\n::: spoiler Click here to see the summary\n" + post_reply.body + '\n:::'
                else:
                    post.body = post_reply.body
-                post.body_html = allowlist_html(markdown_to_html(post.body) + '\n\n<small><span class="render_username">Generated using AI by: <a href="/u/autotldr@lemmings.world" title="AutoTL;DR">AutoTL;DR</a></span></small>')
+                post.body_html = allowlist_html(lemmy_markdown_to_html(post.body) + '\n\n<small><span class="render_username">Generated using AI by: <a href="/u/autotldr@lemmings.world" title="AutoTL;DR">AutoTL;DR</a></span></small>')
                db.session.commit()
                return None

@ -1459,7 +1459,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
    # Get post content. Lemmy and Kbin put this in different places.
    if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown': # Lemmy
        post.body = request_json['object']['source']['content']
-        post.body_html = markdown_to_html(post.body)
+        post.body_html = lemmy_markdown_to_html(post.body)
    elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin
        post.body_html = allowlist_html(request_json['object']['content'])
        post.body = ''
@ -1630,7 +1630,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict):
            isinstance(request_json['object']['source'], dict) and \
            request_json['object']['source']['mediaType'] == 'text/markdown':
        reply.body = request_json['object']['source']['content']
-        reply.body_html = markdown_to_html(reply.body)
+        reply.body_html = lemmy_markdown_to_html(reply.body)
    elif 'content' in request_json['object']:
        reply.body_html = allowlist_html(request_json['object']['content'])
        reply.body = ''
@ -1654,7 +1654,7 @@ def update_post_from_activity(post: Post, request_json: dict):
            isinstance(request_json['object']['source'], dict) and \
            request_json['object']['source']['mediaType'] == 'text/markdown':
        post.body = request_json['object']['source']['content']
-        post.body_html = markdown_to_html(post.body)
+        post.body_html = lemmy_markdown_to_html(post.body)
    elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin
        post.body_html = allowlist_html(request_json['object']['content'])
        post.body = ''
--- a/app/utils.py
+++ b/app/utils.py
@ -227,10 +227,24 @@ def allowlist_html(html: str) -> str:
    return re_empty_anchor.sub(r'<a href="\1" rel="nofollow ugc" target="_blank">\1</a>', str(soup))


+# this is for pyfedi's version of Markdown (differs from lemmy for: newlines for soft breaks, ...)
 def markdown_to_html(markdown_text) -> str:
+    if markdown_text:
+        raw_html = markdown2.markdown(markdown_text, safe_mode=True,
+                    extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True, 'breaks': {'on_newline': True, 'on_backslash': True}})
+        # support lemmy's spoiler format
+        re_spoiler = re.compile(r':{3}\s*?spoiler\s+?(\S.+?)(?:\n|</p>)(.+?)(?:\n|<p>):{3}', re.S)
+        raw_html = re_spoiler.sub(r'<details><summary>\1</summary><p>\2</p></details>', raw_html)
+        return allowlist_html(raw_html)
+    else:
+        return ''
+
+
+# this is for lemmy's version of Markdown (can be removed in future - when HTML from them filtered through an allow_list is used, instead of MD)
+def lemmy_markdown_to_html(markdown_text) -> str:
    if markdown_text:
        raw_html = markdown2.markdown(markdown_text, safe_mode=True, extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True})
-        # replace lemmy spoiler tokens with appropriate html tags instead. (until possibly added as extra to markdown2)
+        # replace lemmy spoiler tokens with appropriate html tags instead.
        re_spoiler = re.compile(r':{3}\s*?spoiler\s+?(\S.+?)(?:\n|</p>)(.+?)(?:\n|<p>):{3}', re.S)
        raw_html = re_spoiler.sub(r'<details><summary>\1</summary><p>\2</p></details>', raw_html)
        return allowlist_html(raw_html)