pyfedi/app/utils.py

156 lines
5.4 KiB
Python
Raw Normal View History

import random
from urllib.parse import urlparse
import flask
from bs4 import BeautifulSoup
import html as html_module
import requests
import os
2023-09-03 16:30:20 +12:00
from flask import current_app, json
from app import db, cache
from app.models import Settings, Domain, Instance, BannedInstances
# Flask's render_template function, with support for themes added
def render_template(template_name: str, **context) -> str:
theme = get_setting('theme', '')
if theme != '':
return flask.render_template(f'themes/{theme}/{template_name}', **context)
else:
return flask.render_template(template_name, **context)
# Jinja: when a file was modified. Useful for cache-busting
def getmtime(filename):
return os.path.getmtime('static/' + filename)
# do a GET request to a uri, return the result
def get_request(uri, params=None, headers=None) -> requests.Response:
try:
response = requests.get(uri, params=params, headers=headers, timeout=1, allow_redirects=True)
except requests.exceptions.SSLError as invalid_cert:
# Not our problem if the other end doesn't have proper SSL
current_app.logger.info(f"{uri} {invalid_cert}")
raise requests.exceptions.SSLError from invalid_cert
except ValueError as ex:
# Convert to a more generic error we handle
raise requests.exceptions.RequestException(f"InvalidCodepoint: {str(ex)}") from None
return response
2023-09-03 16:30:20 +12:00
# saves an arbitrary object into a persistent key-value store. cached.
2023-09-17 21:19:51 +12:00
@cache.memoize(timeout=50)
2023-09-03 16:30:20 +12:00
def get_setting(name: str, default=None):
setting = Settings.query.filter_by(name=name).first()
if setting is None:
return default
else:
return json.loads(setting.value)
# retrieves arbitrary object from persistent key-value store
2023-09-03 16:30:20 +12:00
def set_setting(name: str, value):
setting = Settings.query.filter_by(name=name).first()
if setting is None:
2023-09-17 21:19:51 +12:00
db.session.add(Settings(name=name, value=json.dumps(value)))
2023-09-03 16:30:20 +12:00
else:
setting.value = json.dumps(value)
db.session.commit()
cache.delete_memoized(get_setting)
2023-09-05 20:25:10 +12:00
# Return the contents of a file as a string. Inspired by PHP's function of the same name.
def file_get_contents(filename):
with open(filename, 'r') as file:
contents = file.read()
return contents
random_chars = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
def gibberish(length: int = 10) -> str:
return "".join([random.choice(random_chars) for x in range(length)])
def is_image_url(url):
parsed_url = urlparse(url)
path = parsed_url.path.lower()
common_image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
return any(path.endswith(extension) for extension in common_image_extensions)
# sanitise HTML using an allow list
def allowlist_html(html: str) -> str:
allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h3', 'h4', 'h5']
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Find all tags in the parsed HTML
for tag in soup.find_all():
# If the tag is not in the allowed_tags list, remove it and its contents
if tag.name not in allowed_tags:
tag.extract()
else:
# Filter and sanitize attributes
for attr in list(tag.attrs):
if attr not in ['href', 'src']: # Add allowed attributes here
del tag[attr]
# Encode the HTML to prevent script execution
return html_module.escape(str(soup))
# convert basic HTML to Markdown
def html_to_markdown(html: str) -> str:
soup = BeautifulSoup(html, 'html.parser')
return html_to_markdown_worker(soup)
def html_to_markdown_worker(element, indent_level=0):
formatted_text = ''
for item in element.contents:
if isinstance(item, str):
formatted_text += item
elif item.name == 'p':
formatted_text += '\n\n'
elif item.name == 'br':
formatted_text += ' \n' # Double space at the end for line break
elif item.name == 'strong':
formatted_text += '**' + html_to_markdown_worker(item) + '**'
elif item.name == 'ul':
formatted_text += '\n'
formatted_text += html_to_markdown_worker(item, indent_level + 1)
formatted_text += '\n'
elif item.name == 'ol':
formatted_text += '\n'
formatted_text += html_to_markdown_worker(item, indent_level + 1)
formatted_text += '\n'
elif item.name == 'li':
bullet = '-' if item.find_parent(['ul', 'ol']) and item.find_previous_sibling() is None else ''
formatted_text += ' ' * indent_level + bullet + ' ' + html_to_markdown_worker(item).strip() + '\n'
elif item.name == 'blockquote':
formatted_text += ' ' * indent_level + '> ' + html_to_markdown_worker(item).strip() + '\n'
elif item.name == 'code':
formatted_text += '`' + html_to_markdown_worker(item) + '`'
return formatted_text
def domain_from_url(url: str) -> Domain:
parsed_url = urlparse(url)
domain = Domain.query.filter_by(name=parsed_url.hostname.lower()).first()
return domain
def shorten_string(input_str, max_length=50):
if len(input_str) <= max_length:
return input_str
else:
return input_str[:max_length - 3] + ''
def shorten_url(input: str, max_length=20):
return shorten_string(input.replace('https://', '').replace('http://', ''))