mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
remove broken html_to_markdown function
This commit is contained in:
parent
46bfa7890b
commit
50db96fb6c
4 changed files with 11 additions and 46 deletions
|
@ -19,7 +19,7 @@ from app.activitypub.util import public_key, users_total, active_half_year, acti
|
||||||
upvote_post, activity_already_ingested, delete_post_or_comment, community_members, \
|
upvote_post, activity_already_ingested, delete_post_or_comment, community_members, \
|
||||||
user_removed_from_remote_server, create_post, create_post_reply, update_post_reply_from_activity, \
|
user_removed_from_remote_server, create_post, create_post_reply, update_post_reply_from_activity, \
|
||||||
update_post_from_activity, undo_vote, undo_downvote
|
update_post_from_activity, undo_vote, undo_downvote
|
||||||
from app.utils import gibberish, get_setting, is_image_url, allowlist_html, html_to_markdown, render_template, \
|
from app.utils import gibberish, get_setting, is_image_url, allowlist_html, render_template, \
|
||||||
domain_from_url, markdown_to_html, community_membership, ap_datetime, markdown_to_text, ip_address, can_downvote, \
|
domain_from_url, markdown_to_html, community_membership, ap_datetime, markdown_to_text, ip_address, can_downvote, \
|
||||||
can_upvote, can_create_post, awaken_dormant_instance, shorten_string, can_create_post_reply, sha256_digest, \
|
can_upvote, can_create_post, awaken_dormant_instance, shorten_string, can_create_post_reply, sha256_digest, \
|
||||||
community_moderators
|
community_moderators
|
||||||
|
|
|
@ -22,7 +22,7 @@ from PIL import Image, ImageOps
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import pytesseract
|
import pytesseract
|
||||||
|
|
||||||
from app.utils import get_request, allowlist_html, html_to_markdown, get_setting, ap_datetime, markdown_to_html, \
|
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
|
||||||
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
||||||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
|
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
|
||||||
blocked_phrases
|
blocked_phrases
|
||||||
|
@ -434,7 +434,7 @@ def refresh_community_profile_task(community_id):
|
||||||
community.description_html = markdown_to_html(community.description)
|
community.description_html = markdown_to_html(community.description)
|
||||||
elif 'content' in activity_json:
|
elif 'content' in activity_json:
|
||||||
community.description_html = allowlist_html(activity_json['content'])
|
community.description_html = allowlist_html(activity_json['content'])
|
||||||
community.description = html_to_markdown(community.description_html)
|
community.description = ''
|
||||||
|
|
||||||
icon_changed = cover_changed = False
|
icon_changed = cover_changed = False
|
||||||
if 'icon' in activity_json:
|
if 'icon' in activity_json:
|
||||||
|
@ -585,7 +585,7 @@ def actor_json_to_model(activity_json, address, server):
|
||||||
community.description_html = markdown_to_html(community.description)
|
community.description_html = markdown_to_html(community.description)
|
||||||
elif 'content' in activity_json:
|
elif 'content' in activity_json:
|
||||||
community.description_html = allowlist_html(activity_json['content'])
|
community.description_html = allowlist_html(activity_json['content'])
|
||||||
community.description = html_to_markdown(community.description_html)
|
community.description = ''
|
||||||
if 'icon' in activity_json:
|
if 'icon' in activity_json:
|
||||||
icon = File(source_url=activity_json['icon']['url'])
|
icon = File(source_url=activity_json['icon']['url'])
|
||||||
community.icon = icon
|
community.icon = icon
|
||||||
|
@ -625,7 +625,7 @@ def post_json_to_model(activity_log, post_json, user, community) -> Post:
|
||||||
post.body_html = markdown_to_html(post.body)
|
post.body_html = markdown_to_html(post.body)
|
||||||
elif 'content' in post_json:
|
elif 'content' in post_json:
|
||||||
post.body_html = allowlist_html(post_json['content'])
|
post.body_html = allowlist_html(post_json['content'])
|
||||||
post.body = html_to_markdown(post.body_html)
|
post.body = ''
|
||||||
if 'attachment' in post_json and len(post_json['attachment']) > 0 and 'type' in post_json['attachment'][0]:
|
if 'attachment' in post_json and len(post_json['attachment']) > 0 and 'type' in post_json['attachment'][0]:
|
||||||
if post_json['attachment'][0]['type'] == 'Link':
|
if post_json['attachment'][0]['type'] == 'Link':
|
||||||
post.url = post_json['attachment'][0]['href']
|
post.url = post_json['attachment'][0]['href']
|
||||||
|
@ -773,7 +773,7 @@ def parse_summary(user_json) -> str:
|
||||||
if 'source' in user_json and user_json['source'].get('mediaType') == 'text/markdown':
|
if 'source' in user_json and user_json['source'].get('mediaType') == 'text/markdown':
|
||||||
# Convert Markdown to HTML
|
# Convert Markdown to HTML
|
||||||
markdown_text = user_json['source']['content']
|
markdown_text = user_json['source']['content']
|
||||||
html_content = html_to_markdown(markdown_text)
|
html_content = allowlist_html(markdown_to_html(markdown_text))
|
||||||
return html_content
|
return html_content
|
||||||
elif 'summary' in user_json:
|
elif 'summary' in user_json:
|
||||||
return allowlist_html(user_json['summary'])
|
return allowlist_html(user_json['summary'])
|
||||||
|
@ -1184,7 +1184,7 @@ def create_post_reply(activity_log: ActivityPubLog, community: Community, in_rep
|
||||||
post_reply.body_html = markdown_to_html(post_reply.body)
|
post_reply.body_html = markdown_to_html(post_reply.body)
|
||||||
elif 'content' in request_json['object']: # Kbin
|
elif 'content' in request_json['object']: # Kbin
|
||||||
post_reply.body_html = allowlist_html(request_json['object']['content'])
|
post_reply.body_html = allowlist_html(request_json['object']['content'])
|
||||||
post_reply.body = html_to_markdown(post_reply.body_html)
|
post_reply.body = ''
|
||||||
if post_id is not None:
|
if post_id is not None:
|
||||||
# Discard post_reply if it contains certain phrases. Good for stopping spam floods.
|
# Discard post_reply if it contains certain phrases. Good for stopping spam floods.
|
||||||
if post_reply.body:
|
if post_reply.body:
|
||||||
|
@ -1283,7 +1283,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
|
||||||
post.body_html = markdown_to_html(post.body)
|
post.body_html = markdown_to_html(post.body)
|
||||||
elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin
|
elif 'content' in request_json['object'] and request_json['object']['content'] is not None: # Kbin
|
||||||
post.body_html = allowlist_html(request_json['object']['content'])
|
post.body_html = allowlist_html(request_json['object']['content'])
|
||||||
post.body = html_to_markdown(post.body_html)
|
post.body = ''
|
||||||
# Discard post if it contains certain phrases. Good for stopping spam floods.
|
# Discard post if it contains certain phrases. Good for stopping spam floods.
|
||||||
blocked_phrases_list = blocked_phrases()
|
blocked_phrases_list = blocked_phrases()
|
||||||
for blocked_phrase in blocked_phrases_list:
|
for blocked_phrase in blocked_phrases_list:
|
||||||
|
@ -1375,7 +1375,7 @@ def update_post_reply_from_activity(reply: PostReply, request_json: dict):
|
||||||
reply.body_html = markdown_to_html(reply.body)
|
reply.body_html = markdown_to_html(reply.body)
|
||||||
elif 'content' in request_json['object']:
|
elif 'content' in request_json['object']:
|
||||||
reply.body_html = allowlist_html(request_json['object']['content'])
|
reply.body_html = allowlist_html(request_json['object']['content'])
|
||||||
reply.body = html_to_markdown(reply.body_html)
|
reply.body = ''
|
||||||
reply.edited_at = utcnow()
|
reply.edited_at = utcnow()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
@ -1389,7 +1389,7 @@ def update_post_from_activity(post: Post, request_json: dict):
|
||||||
post.body_html = markdown_to_html(post.body)
|
post.body_html = markdown_to_html(post.body)
|
||||||
elif 'content' in request_json['object']:
|
elif 'content' in request_json['object']:
|
||||||
post.body_html = allowlist_html(request_json['object']['content'])
|
post.body_html = allowlist_html(request_json['object']['content'])
|
||||||
post.body = html_to_markdown(post.body_html)
|
post.body = ''
|
||||||
if 'attachment' in request_json['object'] and 'href' in request_json['object']['attachment']:
|
if 'attachment' in request_json['object'] and 'href' in request_json['object']['attachment']:
|
||||||
post.url = request_json['object']['attachment']['href']
|
post.url = request_json['object']['attachment']['href']
|
||||||
if 'sensitive' in request_json['object']:
|
if 'sensitive' in request_json['object']:
|
||||||
|
|
|
@ -15,7 +15,7 @@ from app.constants import POST_TYPE_ARTICLE, POST_TYPE_LINK, POST_TYPE_IMAGE
|
||||||
from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \
|
from app.models import Community, File, BannedInstances, PostReply, PostVote, Post, utcnow, CommunityMember, Site, \
|
||||||
Instance, Notification, User, ActivityPubLog
|
Instance, Notification, User, ActivityPubLog
|
||||||
from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \
|
from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \
|
||||||
html_to_markdown, is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, \
|
is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, \
|
||||||
remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases
|
remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases
|
||||||
from sqlalchemy import func, desc
|
from sqlalchemy import func, desc
|
||||||
import os
|
import os
|
||||||
|
|
35
app/utils.py
35
app/utils.py
|
@ -214,41 +214,6 @@ def allowlist_html(html: str) -> str:
|
||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
||||||
|
|
||||||
# convert basic HTML to Markdown
|
|
||||||
def html_to_markdown(html: str) -> str:
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
|
||||||
return html_to_markdown_worker(soup)
|
|
||||||
|
|
||||||
|
|
||||||
def html_to_markdown_worker(element, indent_level=0):
|
|
||||||
formatted_text = ''
|
|
||||||
for item in element.contents:
|
|
||||||
if isinstance(item, str):
|
|
||||||
formatted_text += item
|
|
||||||
elif item.name == 'p':
|
|
||||||
formatted_text += '\n\n'
|
|
||||||
elif item.name == 'br':
|
|
||||||
formatted_text += ' \n' # Double space at the end for line break
|
|
||||||
elif item.name == 'strong':
|
|
||||||
formatted_text += '**' + html_to_markdown_worker(item) + '**'
|
|
||||||
elif item.name == 'ul':
|
|
||||||
formatted_text += '\n'
|
|
||||||
formatted_text += html_to_markdown_worker(item, indent_level + 1)
|
|
||||||
formatted_text += '\n'
|
|
||||||
elif item.name == 'ol':
|
|
||||||
formatted_text += '\n'
|
|
||||||
formatted_text += html_to_markdown_worker(item, indent_level + 1)
|
|
||||||
formatted_text += '\n'
|
|
||||||
elif item.name == 'li':
|
|
||||||
bullet = '-' if item.find_parent(['ul', 'ol']) and item.find_previous_sibling() is None else ''
|
|
||||||
formatted_text += ' ' * indent_level + bullet + ' ' + html_to_markdown_worker(item).strip() + '\n'
|
|
||||||
elif item.name == 'blockquote':
|
|
||||||
formatted_text += ' ' * indent_level + '> ' + html_to_markdown_worker(item).strip() + '\n'
|
|
||||||
elif item.name == 'code':
|
|
||||||
formatted_text += '`' + html_to_markdown_worker(item) + '`'
|
|
||||||
return formatted_text
|
|
||||||
|
|
||||||
|
|
||||||
def markdown_to_html(markdown_text) -> str:
|
def markdown_to_html(markdown_text) -> str:
|
||||||
if markdown_text:
|
if markdown_text:
|
||||||
return allowlist_html(markdown2.markdown(markdown_text, safe_mode=True, extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True}))
|
return allowlist_html(markdown2.markdown(markdown_text, safe_mode=True, extras={'middle-word-em': False, 'tables': True, 'fenced-code-blocks': True, 'strike': True}))
|
||||||
|
|
Loading…
Add table
Reference in a new issue