From 438ac72657cd08d26be8fb25b066fb42a662df9a Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Wed, 10 Jan 2024 19:54:54 +1300 Subject: [PATCH] improve html parsing - make plain links clickable --- app/static/structure.css | 13 +++++++++++++ app/static/structure.scss | 13 +++++++++++++ app/utils.py | 19 ++++++++++++++++--- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/app/static/structure.css b/app/static/structure.css index 867d5afd..979e1fda 100644 --- a/app/static/structure.css +++ b/app/static/structure.css @@ -693,6 +693,19 @@ fieldset legend { background-color: #777; color: white; margin-bottom: 15px; + height: 30px; +} +.comment .show-more .fe-angles-down, .comment .show-more .fe-angles-up { + margin-top: 7px; + display: inline-block; +} +@media (min-width: 1280px) { + .comment .show-more { + height: 23px; + } + .comment .show-more .fe-angles-down, .comment .show-more .fe-angles-up { + display: inline; + } } .comment .comment_author img { width: 20px; diff --git a/app/static/structure.scss b/app/static/structure.scss index 5b2e99ec..ff1b9c22 100644 --- a/app/static/structure.scss +++ b/app/static/structure.scss @@ -362,6 +362,19 @@ nav, etc which are used site-wide */ background-color: $dark-grey; color: white; margin-bottom: 15px; + height: 30px; + + .fe-angles-down, .fe-angles-up { + margin-top: 7px; + display: inline-block; + } + + @include breakpoint(laptop) { + height: 23px; + .fe-angles-down, .fe-angles-up { + display: inline; + } + } } .comment_author { diff --git a/app/utils.py b/app/utils.py index b5165dd9..e6d4b4d1 100644 --- a/app/utils.py +++ b/app/utils.py @@ -7,7 +7,6 @@ from typing import List, Literal, Union import markdown2 import math from urllib.parse import urlparse -import requests from functools import wraps import flask from bs4 import BeautifulSoup @@ -20,6 +19,7 @@ from sqlalchemy import text from wtforms.fields import SelectField, SelectMultipleField from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput from app import db, cache +import re from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \ Site, Post, PostReply, utcnow @@ -158,6 +158,16 @@ def allowlist_html(html: str) -> str: # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html, 'html.parser') + # Find all plain text links, convert to tags + plain_text_links = soup.find_all(text=lambda text: re.search(r'https?://\S+', text)) + for text_link in plain_text_links: + # Create a new anchor tag + new_anchor = soup.new_tag('a', href=text_link) + # Set the anchor's text to be the link itself + new_anchor.string = text_link + # Replace the plain text link with the new anchor tag + text_link.replace_with(new_anchor) + # Find all tags in the parsed HTML for tag in soup.find_all(): # If the tag is not in the allowed_tags list, remove it and its contents @@ -166,10 +176,13 @@ def allowlist_html(html: str) -> str: else: # Filter and sanitize attributes for attr in list(tag.attrs): - if attr not in ['href', 'src', 'alt']: # Add allowed attributes here + if attr not in ['href', 'src', 'alt']: del tag[attr] + # Add nofollow and target=_blank to anchors + if tag.name == 'a': + tag.attrs['rel'] = 'nofollow ugc' + tag.attrs['target'] = '_blank' - # Encode the HTML to prevent script execution return str(soup)