From 37b06ef43f2f62adb2ddad4d633f4a25a8775f01 Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 16:24:05 +0100 Subject: [PATCH 1/6] Deliver Actor 'summary' info as HTML (same as Mastodon and Lemmy) --- app/activitypub/routes.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index 52f16cbf..49faf088 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -282,12 +282,8 @@ def user_profile(actor): "type": "Image", "url": f"https://{current_app.config['SERVER_NAME']}{user.cover_image()}" } - if user.about: - actor_data['source'] = { - "content": user.about, - "mediaType": "text/markdown" - } - actor_data['summary'] = markdown_to_html(user.about) + if user.about_html: + actor_data['summary'] = user.about_html if user.matrix_user_id: actor_data['matrixUserId'] = user.matrix_user_id resp = jsonify(actor_data) @@ -332,7 +328,6 @@ def community_profile(actor): "type": "Group", "id": f"https://{server}/c/{actor}", "name": community.title, - "summary": community.description, "sensitive": True if community.nsfw or community.nsfl else False, "preferredUsername": actor, "inbox": f"https://{server}/c/{actor}/inbox", @@ -356,6 +351,8 @@ def community_profile(actor): "published": ap_datetime(community.created_at), "updated": ap_datetime(community.last_active), } + if community.description_html: + actor_data["summary"] = community.description_html if community.icon_id is not None: actor_data["icon"] = { "type": "Image", From dc2431fdd115f1c1e2e4427b6b5614882cbec42c Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 16:27:24 +0100 Subject: [PATCH 2/6] Only use HTML in Page JSON (for fetched, stickied, or old posts) --- app/activitypub/util.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index bc45048b..e9cc13dd 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -148,10 +148,6 @@ def post_to_activity(post: Post, community: Community): "cc": [], "content": post.body_html if post.body_html else '', "mediaType": "text/html", - "source": { - "content": post.body if post.body else '', - "mediaType": "text/markdown" - }, "attachment": [], "commentsEnabled": post.comments_enabled, "sensitive": post.nsfw or post.nsfl, @@ -200,10 +196,6 @@ def post_to_page(post: Post, community: Community): "cc": [], "content": post.body_html if post.body_html else '', "mediaType": "text/html", - "source": { - "content": post.body if post.body else '', - "mediaType": "text/markdown" - }, "attachment": [], "commentsEnabled": post.comments_enabled, "sensitive": post.nsfw or post.nsfl, From e969a2e8bf7fce29044000a6f3832786bdcdc602 Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 16:29:02 +0100 Subject: [PATCH 3/6] Only federate out HTML for posts and replies As per the Decision Log --- app/post/routes.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/app/post/routes.py b/app/post/routes.py index 9c9ffca9..d1fcd499 100644 --- a/app/post/routes.py +++ b/app/post/routes.py @@ -155,10 +155,6 @@ def show_post(post_id: int): 'content': reply.body_html, 'inReplyTo': post.profile_id(), 'mediaType': 'text/html', - 'source': { - 'content': reply.body, - 'mediaType': 'text/markdown' - }, 'published': ap_datetime(utcnow()), 'distinguished': False, 'audience': community.public_url(), @@ -689,10 +685,6 @@ def add_reply(post_id: int, comment_id: int): 'inReplyTo': in_reply_to.profile_id(), 'url': reply.profile_id(), 'mediaType': 'text/html', - 'source': { - 'content': reply.body, - 'mediaType': 'text/markdown' - }, 'published': ap_datetime(utcnow()), 'distinguished': False, 'audience': post.community.public_url(), @@ -1146,10 +1138,6 @@ def federate_post_update(post): 'cc': [], 'content': post.body_html if post.body_html else '', 'mediaType': 'text/html', - 'source': { - 'content': post.body if post.body else '', - 'mediaType': 'text/markdown' - }, 'attachment': [], 'commentsEnabled': post.comments_enabled, 'sensitive': post.nsfw, @@ -1628,10 +1616,6 @@ def post_reply_edit(post_id: int, comment_id: int): 'inReplyTo': in_reply_to.profile_id(), 'url': post_reply.profile_id(), 'mediaType': 'text/html', - 'source': { - 'content': post_reply.body, - 'mediaType': 'text/markdown' - }, 'published': ap_datetime(post_reply.posted_at), 'updated': ap_datetime(post_reply.edited_at), 'distinguished': False, From c77a6de1268db539eaa2ce44e0717fbb13cc32bb Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 19:33:08 +0100 Subject: [PATCH 4/6] Render Markdown soft breaks as new lines for new local content #133 --- app/activitypub/routes.py | 4 ++-- app/activitypub/util.py | 26 +++++++++++++------------- app/utils.py | 16 +++++++++++++++- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index 49faf088..459fd67e 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -25,7 +25,7 @@ from app.activitypub.util import public_key, users_total, active_half_year, acti from app.utils import gibberish, get_setting, is_image_url, allowlist_html, render_template, \ domain_from_url, markdown_to_html, community_membership, ap_datetime, ip_address, can_downvote, \ can_upvote, can_create_post, awaken_dormant_instance, shorten_string, can_create_post_reply, sha256_digest, \ - community_moderators + community_moderators, lemmy_markdown_to_html import werkzeug.exceptions @@ -474,7 +474,7 @@ def process_inbox_request(request_json, activitypublog_id, ip_address): encrypted = request_json['object']['encrypted'] if 'encrypted' in request_json['object'] else None new_message = ChatMessage(sender_id=sender.id, recipient_id=recipient.id, conversation_id=existing_conversation.id, body=request_json['object']['source']['content'], - body_html=allowlist_html(markdown_to_html(request_json['object']['source']['content'])), + body_html=allowlist_html(lemmy_markdown_to_html(request_json['object']['source']['content'])), encrypted=encrypted) db.session.add(new_message) existing_conversation.updated_at = utcnow() diff --git a/app/activitypub/util.py b/app/activitypub/util.py index e9cc13dd..d2d6d109 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -30,7 +30,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \ - notification_subscribers, communities_banned_from + notification_subscribers, communities_banned_from, lemmy_markdown_to_html def public_key(): @@ -522,7 +522,7 @@ def refresh_community_profile_task(community_id): community.title = activity_json['name'] community.description = activity_json['summary'] if 'summary' in activity_json else '' community.rules = activity_json['rules'] if 'rules' in activity_json else '' - community.rules_html = markdown_to_html(activity_json['rules'] if 'rules' in activity_json else '') + community.rules_html = lemmy_markdown_to_html(activity_json['rules'] if 'rules' in activity_json else '') community.restricted_to_mods = activity_json['postingRestrictedToMods'] community.new_mods_wanted = activity_json['newModsWanted'] if 'newModsWanted' in activity_json else False community.private_mods = activity_json['privateMods'] if 'privateMods' in activity_json else False @@ -533,7 +533,7 @@ def refresh_community_profile_task(community_id): if 'source' in activity_json and \ activity_json['source']['mediaType'] == 'text/markdown': community.description = activity_json['source']['content'] - community.description_html = markdown_to_html(community.description) + community.description_html = lemmy_markdown_to_html(community.description) elif 'content' in activity_json: community.description_html = allowlist_html(activity_json['content']) community.description = '' @@ -663,7 +663,7 @@ def actor_json_to_model(activity_json, address, server): title=activity_json['name'], description=activity_json['summary'] if 'summary' in activity_json else '', rules=activity_json['rules'] if 'rules' in activity_json else '', - rules_html=markdown_to_html(activity_json['rules'] if 'rules' in activity_json else ''), + rules_html=lemmy_markdown_to_html(activity_json['rules'] if 'rules' in activity_json else ''), nsfw=activity_json['sensitive'], restricted_to_mods=activity_json['postingRestrictedToMods'], new_mods_wanted=activity_json['newModsWanted'] if 'newModsWanted' in activity_json else False, @@ -689,7 +689,7 @@ def actor_json_to_model(activity_json, address, server): if 'source' in activity_json and \ activity_json['source']['mediaType'] == 'text/markdown': community.description = activity_json['source']['content'] - community.description_html = markdown_to_html(community.description) + community.description_html = lemmy_markdown_to_html(community.description) elif 'content' in activity_json: community.description_html = allowlist_html(activity_json['content']) community.description = '' @@ -732,7 +732,7 @@ def post_json_to_model(activity_log, post_json, user, community) -> Post: if 'source' in post_json and \ post_json['source']['mediaType'] == 'text/markdown': post.body = post_json['source']['content'] - post.body_html = markdown_to_html(post.body) + post.body_html = lemmy_markdown_to_html(post.body) elif 'content' in post_json: post.body_html = allowlist_html(post_json['content']) post.body = '' @@ -938,7 +938,7 @@ def parse_summary(user_json) -> str: if 'source' in user_json and user_json['source'].get('mediaType') == 'text/markdown': # Convert Markdown to HTML markdown_text = user_json['source']['content'] - html_content = allowlist_html(markdown_to_html(markdown_text)) + html_content = allowlist_html(lemmy_markdown_to_html(markdown_text)) return html_content elif 'summary' in user_json: return allowlist_html(user_json['summary']) @@ -1299,7 +1299,7 @@ def delete_post_or_comment_task(user_ap_id, community_ap_id, to_be_deleted_ap_id to_delete.post.reply_count -= 1 if to_delete.has_replies(): to_delete.body = 'Deleted by author' if to_delete.author.id == deletor.id else 'Deleted by moderator' - to_delete.body_html = markdown_to_html(to_delete.body) + to_delete.body_html = lemmy_markdown_to_html(to_delete.body) else: to_delete.delete_dependencies() db.session.delete(to_delete) @@ -1339,7 +1339,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep 'mediaType' in request_json['object']['source'] and \ request_json['object']['source']['mediaType'] == 'text/markdown': post_reply.body = request_json['object']['source']['content'] - post_reply.body_html = markdown_to_html(post_reply.body) + post_reply.body_html = lemmy_markdown_to_html(post_reply.body) elif 'content' in request_json['object']: # Kbin post_reply.body_html = allowlist_html(request_json['object']['content']) post_reply.body = '' @@ -1362,7 +1362,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep post.body = "🤖 I'm a bot that provides automatic summaries for articles:\n::: spoiler Click here to see the summary\n" + post_reply.body + '\n:::' else: post.body = post_reply.body - post.body_html = allowlist_html(markdown_to_html(post.body) + '\n\nGenerated using AI by: AutoTL;DR') + post.body_html = allowlist_html(lemmy_markdown_to_html(post.body) + '\n\nGenerated using AI by: AutoTL;DR') db.session.commit() return None @@ -1459,7 +1459,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json # Get post content. Lemmy and Kbin put this in different places. if 'source' in request_json['object'] and isinstance(request_json['object']['source'], dict) and request_json['object']['source']['mediaType'] == 'text/markdown': # Lemmy post.body = request_json['object']['source']['content'] - post.body_html = markdown_to_html(post.body) + post.body_html = lemmy_markdown_to_html(post.body) elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin post.body_html = allowlist_html(request_json['object']['content']) post.body = '' @@ -1630,7 +1630,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict): isinstance(request_json['object']['source'], dict) and \ request_json['object']['source']['mediaType'] == 'text/markdown': reply.body = request_json['object']['source']['content'] - reply.body_html = markdown_to_html(reply.body) + reply.body_html = lemmy_markdown_to_html(reply.body) elif 'content' in request_json['object']: reply.body_html = allowlist_html(request_json['object']['content']) reply.body = '' @@ -1654,7 +1654,7 @@ def update_post_from_activity(post: Post, request_json: dict): isinstance(request_json['object']['source'], dict) and \ request_json['object']['source']['mediaType'] == 'text/markdown': post.body = request_json['object']['source']['content'] - post.body_html = markdown_to_html(post.body) + post.body_html = lemmy_markdown_to_html(post.body) elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin post.body_html = allowlist_html(request_json['object']['content']) post.body = '' diff --git a/app/utils.py b/app/utils.py index 23c72222..89a46c0f 100644 --- a/app/utils.py +++ b/app/utils.py @@ -227,10 +227,24 @@ def allowlist_html(html: str) -> str: return re_empty_anchor.sub(r'\1', str(soup)) +# this is for pyfedi's version of Markdown (differs from lemmy for: newlines for soft breaks, ...) def markdown_to_html(markdown_text) -> str: + if markdown_text: + raw_html = markdown2.markdown(markdown_text, safe_mode=True, + extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True, 'breaks': {'on_newline': True, 'on_backslash': True}}) + # support lemmy's spoiler format + re_spoiler = re.compile(r':{3}\s*?spoiler\s+?(\S.+?)(?:\n|

)(.+?)(?:\n|

):{3}', re.S) + raw_html = re_spoiler.sub(r'

\1

\2

', raw_html) + return allowlist_html(raw_html) + else: + return '' + + +# this is for lemmy's version of Markdown (can be removed in future - when HTML from them filtered through an allow_list is used, instead of MD) +def lemmy_markdown_to_html(markdown_text) -> str: if markdown_text: raw_html = markdown2.markdown(markdown_text, safe_mode=True, extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True}) - # replace lemmy spoiler tokens with appropriate html tags instead. (until possibly added as extra to markdown2) + # replace lemmy spoiler tokens with appropriate html tags instead. re_spoiler = re.compile(r':{3}\s*?spoiler\s+?(\S.+?)(?:\n|

)(.+?)(?:\n|

):{3}', re.S) raw_html = re_spoiler.sub(r'

\1

\2

', raw_html) return allowlist_html(raw_html) From b0f68483129cfcda94e3fafb491e9b4fe8566ab6 Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 20:38:16 +0100 Subject: [PATCH 5/6] Avoid invoking allowlist_html() twice (markdown_to_html() already returns the output from that function) (autotldr username html isn't passed to allowlist_list to avoid getting a target=_blank attribute) --- app/activitypub/routes.py | 2 +- app/activitypub/util.py | 4 ++-- app/chat/util.py | 2 +- app/user/routes.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/activitypub/routes.py b/app/activitypub/routes.py index 459fd67e..04540049 100644 --- a/app/activitypub/routes.py +++ b/app/activitypub/routes.py @@ -474,7 +474,7 @@ def process_inbox_request(request_json, activitypublog_id, ip_address): encrypted = request_json['object']['encrypted'] if 'encrypted' in request_json['object'] else None new_message = ChatMessage(sender_id=sender.id, recipient_id=recipient.id, conversation_id=existing_conversation.id, body=request_json['object']['source']['content'], - body_html=allowlist_html(lemmy_markdown_to_html(request_json['object']['source']['content'])), + body_html=lemmy_markdown_to_html(request_json['object']['source']['content']), encrypted=encrypted) db.session.add(new_message) existing_conversation.updated_at = utcnow() diff --git a/app/activitypub/util.py b/app/activitypub/util.py index d2d6d109..de6c2fc2 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -938,7 +938,7 @@ def parse_summary(user_json) -> str: if 'source' in user_json and user_json['source'].get('mediaType') == 'text/markdown': # Convert Markdown to HTML markdown_text = user_json['source']['content'] - html_content = allowlist_html(lemmy_markdown_to_html(markdown_text)) + html_content = lemmy_markdown_to_html(markdown_text) return html_content elif 'summary' in user_json: return allowlist_html(user_json['summary']) @@ -1362,7 +1362,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep post.body = "🤖 I'm a bot that provides automatic summaries for articles:\n::: spoiler Click here to see the summary\n" + post_reply.body + '\n:::' else: post.body = post_reply.body - post.body_html = allowlist_html(lemmy_markdown_to_html(post.body) + '\n\nGenerated using AI by: AutoTL;DR') + post.body_html = lemmy_markdown_to_html(post.body) + '\n\nGenerated using AI by: AutoTL;DR' db.session.commit() return None diff --git a/app/chat/util.py b/app/chat/util.py index 4f5114b9..d997e181 100644 --- a/app/chat/util.py +++ b/app/chat/util.py @@ -12,7 +12,7 @@ from app.utils import allowlist_html, shorten_string, gibberish, markdown_to_htm def send_message(message: str, conversation_id: int) -> ChatMessage: conversation = Conversation.query.get(conversation_id) reply = ChatMessage(sender_id=current_user.id, conversation_id=conversation.id, - body=message, body_html=allowlist_html(markdown_to_html(message))) + body=message, body_html=markdown_to_html(message)) conversation.updated_at = utcnow() db.session.add(reply) db.session.commit() diff --git a/app/user/routes.py b/app/user/routes.py index dc84fb0c..c1b75d9c 100644 --- a/app/user/routes.py +++ b/app/user/routes.py @@ -108,7 +108,7 @@ def edit_profile(actor): if form.password_field.data.strip() != '': current_user.set_password(form.password_field.data) current_user.about = form.about.data - current_user.about_html = allowlist_html(markdown_to_html(form.about.data)) + current_user.about_html = markdown_to_html(form.about.data) current_user.matrix_user_id = form.matrixuserid.data current_user.bot = form.bot.data profile_file = request.files['profile_file'] From e7c213706d53425f600ca21dc96843330142bd66 Mon Sep 17 00:00:00 2001 From: freamon Date: Tue, 14 May 2024 20:53:05 +0100 Subject: [PATCH 6/6] For deeper replies: also comment out 'Enable markdown editor' link to prevent overlap with language choice --- app/templates/post/add_reply.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/post/add_reply.html b/app/templates/post/add_reply.html index 1aec483e..b8868395 100644 --- a/app/templates/post/add_reply.html +++ b/app/templates/post/add_reply.html @@ -34,7 +34,7 @@ }); {% else %} - + {% endif %} {% endif %}