From 7e77c0728da38c3c1f78129dfae563b552436e01 Mon Sep 17 00:00:00 2001 From: freamon Date: Wed, 25 Sep 2024 01:09:02 +0000 Subject: [PATCH] Revert to always preferring incoming Markdown over HTML Due to issues with spoiler format for Lemmmy and Fediverse links for MBIN (#327) --- app/activitypub/util.py | 27 +++++++++------------------ app/utils.py | 5 +++-- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index e77c81bc..f55f2b3b 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -487,8 +487,7 @@ def refresh_user_profile_task(user_id): user.about_html = '' if 'source' in activity_json and activity_json['source'].get('mediaType') == 'text/markdown': user.about = activity_json['source']['content'] - if '::: spoiler' in user.about: - user.about_html = markdown_to_html(user.about) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + user.about_html = markdown_to_html(user.about) # prefer Markdown if provided, overwrite version obtained from HTML else: user.about = html_to_text(user.about_html) user.ap_fetched_at = utcnow() @@ -585,8 +584,7 @@ def refresh_community_profile_task(community_id): community.description_html = allowlist_html(description_html) if 'source' in activity_json and activity_json['source'].get('mediaType') == 'text/markdown': community.description = activity_json['source']['content'] - if '::: spoiler' in community.description: - community.description_html = markdown_to_html(community.description) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + community.description_html = markdown_to_html(community.description) # prefer Markdown if provided, overwrite version obtained from HTML else: community.description = html_to_text(community.description_html) @@ -712,8 +710,7 @@ def actor_json_to_model(activity_json, address, server): user.about_html = '' if 'source' in activity_json and activity_json['source'].get('mediaType') == 'text/markdown': user.about = activity_json['source']['content'] - if '::: spoiler' in user.about: - user.about_html = markdown_to_html(user.about) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + user.about_html = markdown_to_html(user.about) # prefer Markdown if provided, overwrite version obtained from HTML else: user.about = html_to_text(user.about_html) @@ -794,8 +791,7 @@ def actor_json_to_model(activity_json, address, server): community.description_html = allowlist_html(description_html) if 'source' in activity_json and activity_json['source'].get('mediaType') == 'text/markdown': community.description = activity_json['source']['content'] - if '::: spoiler' in community.description: - community.description_html = markdown_to_html(community.description) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + community.description_html = markdown_to_html(community.description) # prefer Markdown if provided, overwrite version obtained from HTML else: community.description = html_to_text(community.description_html) @@ -860,8 +856,7 @@ def post_json_to_model(activity_log, post_json, user, community) -> Post: post.body_html = allowlist_html(post_json['content']) if 'source' in post_json and post_json['source']['mediaType'] == 'text/markdown': post.body = post_json['source']['content'] - if '::: spoiler' in post.body: - post.body_html = markdown_to_html(post.body) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML else: post.body = html_to_text(post.body_html) elif post_json['mediaType'] == 'text/markdown': @@ -1593,8 +1588,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \ 'mediaType' in request_json['object']['source'] and request_json['object']['source']['mediaType'] == 'text/markdown': post_reply.body = request_json['object']['source']['content'] - if '::: spoiler' in post_reply.body: - post_reply.body_html = markdown_to_html(post_reply.body) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + post_reply.body_html = markdown_to_html(post_reply.body) # prefer Markdown if provided, overwrite version obtained from HTML else: post_reply.body = html_to_text(post_reply.body_html) # Language - Lemmy uses 'language' while Mastodon uses 'contentMap' @@ -1713,8 +1707,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json post.body_html = allowlist_html(request_json['object']['content']) if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown': post.body = request_json['object']['source']['content'] - if '::: spoiler' in post.body: - post.body_html = markdown_to_html(post.body) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML else: post.body = html_to_text(post.body_html) elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown': @@ -1959,8 +1952,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict): if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and \ 'mediaType' in request_json['object']['source'] and request_json['object']['source']['mediaType'] == 'text/markdown': reply.body = request_json['object']['source']['content'] - if '::: spoiler' in reply.body: - reply.body_html = markdown_to_html(reply.body) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + reply.body_html = markdown_to_html(reply.body) # prefer Markdown if provided, overwrite version obtained from HTML else: reply.body = html_to_text(reply.body_html) # Language @@ -1984,8 +1976,7 @@ def update_post_from_activity(post: Post, request_json: dict): post.body_html = allowlist_html(request_json['object']['content']) if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown': post.body = request_json['object']['source']['content'] - if '::: spoiler' in post.body: - post.body_html = markdown_to_html(post.body) # overwrite as Lemmy doesn't convert spoiler contents into HTML very well + post.body_html = markdown_to_html(post.body) # prefer Markdown if provided, overwrite version obtained from HTML else: post.body = html_to_text(post.body_html) elif 'mediaType' in request_json['object'] and request_json['object']['mediaType'] == 'text/markdown': diff --git a/app/utils.py b/app/utils.py index b05b653f..f36057e0 100644 --- a/app/utils.py +++ b/app/utils.py @@ -323,7 +323,8 @@ def allowlist_html(html: str, a_target='_blank') -> str: return clean_html -# this is for pyfedi's version of Markdown (differs from lemmy for: newlines for soft breaks, ...) +# use this for Markdown irrespective of origin, as it can deal with both soft break newlines ('\n' used by PieFed) and hard break newlines (' \n' or ' \\n') +# ' \\n' will create

instead of just
, but hopefully that's acceptable. def markdown_to_html(markdown_text, anchors_new_tab=True) -> str: if markdown_text: raw_html = markdown2.markdown(markdown_text, @@ -336,7 +337,7 @@ def markdown_to_html(markdown_text, anchors_new_tab=True) -> str: # this function lets local users use the more intuitive soft-breaks for newlines, but actually stores the Markdown in Lemmy-compatible format # Reasons for this: # 1. it's what any adapted Lemmy apps using an API would expect -# 2. we need to revert to sending out Markdown in 'source' because: +# 2. we've reverted to sending out Markdown in 'source' because: # a. Lemmy doesn't convert '
' back into its '::: spoiler' format # b. anything coming from another PieFed instance would get reduced with html_to_text() # c. raw 'https' strings in code blocks are being converted into links for HTML that Lemmy then converts back into []()