mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 19:36:56 -08:00
improve html parsing - make plain links clickable
This commit is contained in:
parent
daf124ae49
commit
438ac72657
3 changed files with 42 additions and 3 deletions
|
@ -693,6 +693,19 @@ fieldset legend {
|
|||
background-color: #777;
|
||||
color: white;
|
||||
margin-bottom: 15px;
|
||||
height: 30px;
|
||||
}
|
||||
.comment .show-more .fe-angles-down, .comment .show-more .fe-angles-up {
|
||||
margin-top: 7px;
|
||||
display: inline-block;
|
||||
}
|
||||
@media (min-width: 1280px) {
|
||||
.comment .show-more {
|
||||
height: 23px;
|
||||
}
|
||||
.comment .show-more .fe-angles-down, .comment .show-more .fe-angles-up {
|
||||
display: inline;
|
||||
}
|
||||
}
|
||||
.comment .comment_author img {
|
||||
width: 20px;
|
||||
|
|
|
@ -362,6 +362,19 @@ nav, etc which are used site-wide */
|
|||
background-color: $dark-grey;
|
||||
color: white;
|
||||
margin-bottom: 15px;
|
||||
height: 30px;
|
||||
|
||||
.fe-angles-down, .fe-angles-up {
|
||||
margin-top: 7px;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
@include breakpoint(laptop) {
|
||||
height: 23px;
|
||||
.fe-angles-down, .fe-angles-up {
|
||||
display: inline;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.comment_author {
|
||||
|
|
19
app/utils.py
19
app/utils.py
|
@ -7,7 +7,6 @@ from typing import List, Literal, Union
|
|||
import markdown2
|
||||
import math
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
from functools import wraps
|
||||
import flask
|
||||
from bs4 import BeautifulSoup
|
||||
|
@ -20,6 +19,7 @@ from sqlalchemy import text
|
|||
from wtforms.fields import SelectField, SelectMultipleField
|
||||
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
||||
from app import db, cache
|
||||
import re
|
||||
|
||||
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
||||
Site, Post, PostReply, utcnow
|
||||
|
@ -158,6 +158,16 @@ def allowlist_html(html: str) -> str:
|
|||
# Parse the HTML using BeautifulSoup
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# Find all plain text links, convert to <a> tags
|
||||
plain_text_links = soup.find_all(text=lambda text: re.search(r'https?://\S+', text))
|
||||
for text_link in plain_text_links:
|
||||
# Create a new anchor tag
|
||||
new_anchor = soup.new_tag('a', href=text_link)
|
||||
# Set the anchor's text to be the link itself
|
||||
new_anchor.string = text_link
|
||||
# Replace the plain text link with the new anchor tag
|
||||
text_link.replace_with(new_anchor)
|
||||
|
||||
# Find all tags in the parsed HTML
|
||||
for tag in soup.find_all():
|
||||
# If the tag is not in the allowed_tags list, remove it and its contents
|
||||
|
@ -166,10 +176,13 @@ def allowlist_html(html: str) -> str:
|
|||
else:
|
||||
# Filter and sanitize attributes
|
||||
for attr in list(tag.attrs):
|
||||
if attr not in ['href', 'src', 'alt']: # Add allowed attributes here
|
||||
if attr not in ['href', 'src', 'alt']:
|
||||
del tag[attr]
|
||||
# Add nofollow and target=_blank to anchors
|
||||
if tag.name == 'a':
|
||||
tag.attrs['rel'] = 'nofollow ugc'
|
||||
tag.attrs['target'] = '_blank'
|
||||
|
||||
# Encode the HTML to prevent script execution
|
||||
return str(soup)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue