2023-12-22 15:34:45 +13:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2024-02-25 16:24:50 +13:00
|
|
|
import hashlib
|
2024-02-28 12:55:30 +13:00
|
|
|
import mimetypes
|
2023-09-09 20:46:40 +12:00
|
|
|
import random
|
2024-02-21 08:36:00 +13:00
|
|
|
import urllib
|
2024-01-11 20:39:22 +13:00
|
|
|
from collections import defaultdict
|
|
|
|
from datetime import datetime, timedelta, date
|
2024-01-02 19:41:00 +13:00
|
|
|
from typing import List, Literal, Union
|
2023-12-08 17:13:38 +13:00
|
|
|
|
2023-10-10 22:25:37 +13:00
|
|
|
import markdown2
|
|
|
|
import math
|
2024-03-08 22:09:54 +13:00
|
|
|
from urllib.parse import urlparse, parse_qs, urlencode
|
2023-10-23 13:03:35 +13:00
|
|
|
from functools import wraps
|
2023-09-16 19:09:04 +12:00
|
|
|
import flask
|
2024-01-11 08:21:33 +13:00
|
|
|
from bs4 import BeautifulSoup, NavigableString
|
2023-08-29 22:01:06 +12:00
|
|
|
import requests
|
|
|
|
import os
|
2024-01-02 19:41:00 +13:00
|
|
|
from flask import current_app, json, redirect, url_for, request, make_response, Response, g
|
2023-10-21 15:49:01 +13:00
|
|
|
from flask_login import current_user
|
2024-01-11 20:39:22 +13:00
|
|
|
from sqlalchemy import text, or_
|
2023-12-13 21:04:11 +13:00
|
|
|
from wtforms.fields import SelectField, SelectMultipleField
|
|
|
|
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
2023-09-16 19:09:04 +12:00
|
|
|
from app import db, cache
|
2024-01-10 19:54:54 +13:00
|
|
|
import re
|
2023-12-22 15:34:45 +13:00
|
|
|
|
2024-02-02 15:30:03 +13:00
|
|
|
from app.email import send_welcome_email
|
2024-01-02 19:41:00 +13:00
|
|
|
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
2024-01-12 12:34:08 +13:00
|
|
|
Site, Post, PostReply, utcnow, Filter, CommunityMember
|
2023-09-16 19:09:04 +12:00
|
|
|
|
|
|
|
|
|
|
|
# Flask's render_template function, with support for themes added
|
2023-12-10 15:10:09 +13:00
|
|
|
def render_template(template_name: str, **context) -> Response:
|
2024-02-07 17:31:12 +13:00
|
|
|
theme = current_theme()
|
|
|
|
if theme != '' and os.path.exists(f'app/templates/themes/{theme}/{template_name}'):
|
2023-12-10 15:10:09 +13:00
|
|
|
content = flask.render_template(f'themes/{theme}/{template_name}', **context)
|
2023-09-16 19:09:04 +12:00
|
|
|
else:
|
2023-12-10 15:10:09 +13:00
|
|
|
content = flask.render_template(template_name, **context)
|
|
|
|
|
|
|
|
# Browser caching using ETags and Cache-Control
|
|
|
|
resp = make_response(content)
|
2023-12-11 20:46:38 +13:00
|
|
|
if current_user.is_anonymous:
|
|
|
|
if 'etag' in context:
|
|
|
|
resp.headers.add_header('ETag', context['etag'])
|
|
|
|
resp.headers.add_header('Cache-Control', 'no-cache, max-age=600, must-revalidate')
|
2023-12-10 15:10:09 +13:00
|
|
|
return resp
|
|
|
|
|
|
|
|
|
|
|
|
def request_etag_matches(etag):
|
|
|
|
if 'If-None-Match' in request.headers:
|
|
|
|
old_etag = request.headers['If-None-Match']
|
|
|
|
return old_etag == etag
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2023-12-12 18:28:49 +13:00
|
|
|
def return_304(etag, content_type=None):
|
2023-12-10 15:10:09 +13:00
|
|
|
resp = make_response('', 304)
|
|
|
|
resp.headers.add_header('ETag', request.headers['If-None-Match'])
|
|
|
|
resp.headers.add_header('Cache-Control', 'no-cache, max-age=600, must-revalidate')
|
2024-03-02 13:56:47 +13:00
|
|
|
resp.headers.add_header('Vary', 'Accept, Cookie')
|
2023-12-12 18:28:49 +13:00
|
|
|
if content_type:
|
|
|
|
resp.headers.set('Content-Type', content_type)
|
2023-12-10 15:10:09 +13:00
|
|
|
return resp
|
2023-08-29 22:01:06 +12:00
|
|
|
|
|
|
|
|
|
|
|
# Jinja: when a file was modified. Useful for cache-busting
|
|
|
|
def getmtime(filename):
|
2024-02-13 21:28:33 +13:00
|
|
|
if os.path.exists('static/' + filename):
|
|
|
|
return os.path.getmtime('static/' + filename)
|
2023-08-29 22:01:06 +12:00
|
|
|
|
|
|
|
|
|
|
|
# do a GET request to a uri, return the result
|
|
|
|
def get_request(uri, params=None, headers=None) -> requests.Response:
|
2023-11-16 22:31:14 +13:00
|
|
|
if headers is None:
|
|
|
|
headers = {'User-Agent': 'PieFed/1.0'}
|
|
|
|
else:
|
|
|
|
headers.update({'User-Agent': 'PieFed/1.0'})
|
2024-02-21 08:36:00 +13:00
|
|
|
if params and '/webfinger' in uri:
|
|
|
|
payload_str = urllib.parse.urlencode(params, safe=':@')
|
|
|
|
else:
|
|
|
|
payload_str = urllib.parse.urlencode(params) if params else None
|
2023-08-29 22:01:06 +12:00
|
|
|
try:
|
2024-02-21 08:36:00 +13:00
|
|
|
response = requests.get(uri, params=payload_str, headers=headers, timeout=5, allow_redirects=True)
|
2023-08-29 22:01:06 +12:00
|
|
|
except requests.exceptions.SSLError as invalid_cert:
|
|
|
|
# Not our problem if the other end doesn't have proper SSL
|
|
|
|
current_app.logger.info(f"{uri} {invalid_cert}")
|
|
|
|
raise requests.exceptions.SSLError from invalid_cert
|
|
|
|
except ValueError as ex:
|
|
|
|
# Convert to a more generic error we handle
|
|
|
|
raise requests.exceptions.RequestException(f"InvalidCodepoint: {str(ex)}") from None
|
2023-12-22 16:18:44 +13:00
|
|
|
except requests.exceptions.ReadTimeout as read_timeout:
|
|
|
|
current_app.logger.info(f"{uri} {read_timeout}")
|
|
|
|
raise requests.exceptions.ReadTimeout from read_timeout
|
2023-08-29 22:01:06 +12:00
|
|
|
|
|
|
|
return response
|
2023-09-03 16:30:20 +12:00
|
|
|
|
|
|
|
|
2023-12-29 17:32:35 +13:00
|
|
|
# do a HEAD request to a uri, return the result
|
|
|
|
def head_request(uri, params=None, headers=None) -> requests.Response:
|
|
|
|
if headers is None:
|
|
|
|
headers = {'User-Agent': 'PieFed/1.0'}
|
|
|
|
else:
|
|
|
|
headers.update({'User-Agent': 'PieFed/1.0'})
|
|
|
|
try:
|
|
|
|
response = requests.head(uri, params=params, headers=headers, timeout=5, allow_redirects=True)
|
|
|
|
except requests.exceptions.SSLError as invalid_cert:
|
|
|
|
# Not our problem if the other end doesn't have proper SSL
|
|
|
|
current_app.logger.info(f"{uri} {invalid_cert}")
|
|
|
|
raise requests.exceptions.SSLError from invalid_cert
|
|
|
|
except ValueError as ex:
|
|
|
|
# Convert to a more generic error we handle
|
|
|
|
raise requests.exceptions.RequestException(f"InvalidCodepoint: {str(ex)}") from None
|
|
|
|
except requests.exceptions.ReadTimeout as read_timeout:
|
|
|
|
current_app.logger.info(f"{uri} {read_timeout}")
|
|
|
|
raise requests.exceptions.ReadTimeout from read_timeout
|
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
2023-09-16 19:09:04 +12:00
|
|
|
# saves an arbitrary object into a persistent key-value store. cached.
|
2023-09-17 21:19:51 +12:00
|
|
|
@cache.memoize(timeout=50)
|
2023-09-03 16:30:20 +12:00
|
|
|
def get_setting(name: str, default=None):
|
|
|
|
setting = Settings.query.filter_by(name=name).first()
|
|
|
|
if setting is None:
|
|
|
|
return default
|
|
|
|
else:
|
|
|
|
return json.loads(setting.value)
|
|
|
|
|
|
|
|
|
2023-09-09 20:46:40 +12:00
|
|
|
# retrieves arbitrary object from persistent key-value store
|
2023-09-03 16:30:20 +12:00
|
|
|
def set_setting(name: str, value):
|
|
|
|
setting = Settings.query.filter_by(name=name).first()
|
|
|
|
if setting is None:
|
2023-09-17 21:19:51 +12:00
|
|
|
db.session.add(Settings(name=name, value=json.dumps(value)))
|
2023-09-03 16:30:20 +12:00
|
|
|
else:
|
|
|
|
setting.value = json.dumps(value)
|
|
|
|
db.session.commit()
|
2023-09-16 19:09:04 +12:00
|
|
|
cache.delete_memoized(get_setting)
|
2023-09-05 20:25:10 +12:00
|
|
|
|
|
|
|
|
|
|
|
# Return the contents of a file as a string. Inspired by PHP's function of the same name.
|
|
|
|
def file_get_contents(filename):
|
|
|
|
with open(filename, 'r') as file:
|
|
|
|
contents = file.read()
|
|
|
|
return contents
|
2023-09-09 20:46:40 +12:00
|
|
|
|
|
|
|
|
|
|
|
random_chars = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
|
|
|
|
|
|
|
|
|
|
def gibberish(length: int = 10) -> str:
|
|
|
|
return "".join([random.choice(random_chars) for x in range(length)])
|
2023-09-16 19:09:04 +12:00
|
|
|
|
|
|
|
|
|
|
|
def is_image_url(url):
|
|
|
|
parsed_url = urlparse(url)
|
|
|
|
path = parsed_url.path.lower()
|
|
|
|
common_image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
|
|
|
|
return any(path.endswith(extension) for extension in common_image_extensions)
|
|
|
|
|
|
|
|
|
|
|
|
# sanitise HTML using an allow list
|
|
|
|
def allowlist_html(html: str) -> str:
|
2024-01-08 22:43:38 +13:00
|
|
|
if html is None or html == '':
|
|
|
|
return ''
|
2024-03-07 21:00:11 +00:00
|
|
|
allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre',
|
2024-02-27 04:35:03 +13:00
|
|
|
'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead']
|
2023-09-16 19:09:04 +12:00
|
|
|
# Parse the HTML using BeautifulSoup
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
|
2024-01-10 19:54:54 +13:00
|
|
|
# Find all plain text links, convert to <a> tags
|
2024-01-11 08:21:33 +13:00
|
|
|
re_url = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
|
|
|
|
for tag in soup.find_all(text=True):
|
|
|
|
tags = []
|
|
|
|
url = False
|
|
|
|
for t in re_url.split(tag.string):
|
|
|
|
if re_url.match(t):
|
|
|
|
a = soup.new_tag("a", href=t)
|
|
|
|
a.string = t
|
|
|
|
tags.append(a)
|
|
|
|
url = True
|
|
|
|
else:
|
|
|
|
tags.append(t)
|
|
|
|
if url:
|
|
|
|
for t in tags:
|
|
|
|
tag.insert_before(t)
|
|
|
|
tag.extract()
|
|
|
|
|
|
|
|
# Filter tags, leaving only safe ones
|
2023-09-16 19:09:04 +12:00
|
|
|
for tag in soup.find_all():
|
|
|
|
# If the tag is not in the allowed_tags list, remove it and its contents
|
|
|
|
if tag.name not in allowed_tags:
|
|
|
|
tag.extract()
|
|
|
|
else:
|
|
|
|
# Filter and sanitize attributes
|
|
|
|
for attr in list(tag.attrs):
|
2024-01-10 19:54:54 +13:00
|
|
|
if attr not in ['href', 'src', 'alt']:
|
2023-09-16 19:09:04 +12:00
|
|
|
del tag[attr]
|
2024-01-10 19:54:54 +13:00
|
|
|
# Add nofollow and target=_blank to anchors
|
|
|
|
if tag.name == 'a':
|
|
|
|
tag.attrs['rel'] = 'nofollow ugc'
|
|
|
|
tag.attrs['target'] = '_blank'
|
2024-02-17 23:11:39 +13:00
|
|
|
# Add loading=lazy to images
|
|
|
|
if tag.name == 'img':
|
|
|
|
tag.attrs['loading'] = 'lazy'
|
2024-02-27 04:35:03 +13:00
|
|
|
if tag.name == 'table':
|
|
|
|
tag.attrs['class'] = 'table'
|
2023-09-16 19:09:04 +12:00
|
|
|
|
2023-10-10 22:25:37 +13:00
|
|
|
return str(soup)
|
2023-09-16 19:09:04 +12:00
|
|
|
|
|
|
|
|
|
|
|
# convert basic HTML to Markdown
|
|
|
|
def html_to_markdown(html: str) -> str:
|
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
return html_to_markdown_worker(soup)
|
|
|
|
|
|
|
|
|
|
|
|
def html_to_markdown_worker(element, indent_level=0):
|
|
|
|
formatted_text = ''
|
|
|
|
for item in element.contents:
|
|
|
|
if isinstance(item, str):
|
|
|
|
formatted_text += item
|
|
|
|
elif item.name == 'p':
|
|
|
|
formatted_text += '\n\n'
|
|
|
|
elif item.name == 'br':
|
|
|
|
formatted_text += ' \n' # Double space at the end for line break
|
|
|
|
elif item.name == 'strong':
|
|
|
|
formatted_text += '**' + html_to_markdown_worker(item) + '**'
|
|
|
|
elif item.name == 'ul':
|
|
|
|
formatted_text += '\n'
|
|
|
|
formatted_text += html_to_markdown_worker(item, indent_level + 1)
|
|
|
|
formatted_text += '\n'
|
|
|
|
elif item.name == 'ol':
|
|
|
|
formatted_text += '\n'
|
|
|
|
formatted_text += html_to_markdown_worker(item, indent_level + 1)
|
|
|
|
formatted_text += '\n'
|
|
|
|
elif item.name == 'li':
|
|
|
|
bullet = '-' if item.find_parent(['ul', 'ol']) and item.find_previous_sibling() is None else ''
|
|
|
|
formatted_text += ' ' * indent_level + bullet + ' ' + html_to_markdown_worker(item).strip() + '\n'
|
|
|
|
elif item.name == 'blockquote':
|
|
|
|
formatted_text += ' ' * indent_level + '> ' + html_to_markdown_worker(item).strip() + '\n'
|
|
|
|
elif item.name == 'code':
|
|
|
|
formatted_text += '`' + html_to_markdown_worker(item) + '`'
|
|
|
|
return formatted_text
|
|
|
|
|
|
|
|
|
2023-10-10 22:25:37 +13:00
|
|
|
def markdown_to_html(markdown_text) -> str:
|
2023-10-21 15:49:01 +13:00
|
|
|
if markdown_text:
|
2024-02-27 04:35:03 +13:00
|
|
|
return allowlist_html(markdown2.markdown(markdown_text, safe_mode=True, extras={'middle-word-em': False, 'tables': True}))
|
2023-10-21 15:49:01 +13:00
|
|
|
else:
|
|
|
|
return ''
|
2023-10-10 22:25:37 +13:00
|
|
|
|
|
|
|
|
2023-10-23 20:18:46 +13:00
|
|
|
def markdown_to_text(markdown_text) -> str:
|
2023-12-28 20:39:26 +13:00
|
|
|
if not markdown_text or markdown_text == '':
|
|
|
|
return ''
|
2023-10-23 20:18:46 +13:00
|
|
|
return markdown_text.replace("# ", '')
|
|
|
|
|
|
|
|
|
2023-11-22 20:48:27 +13:00
|
|
|
def domain_from_url(url: str, create=True) -> Domain:
|
2023-11-29 20:32:07 +13:00
|
|
|
parsed_url = urlparse(url.lower().replace('www.', ''))
|
2024-02-23 16:52:17 +13:00
|
|
|
if parsed_url and parsed_url.hostname:
|
2024-03-08 22:01:46 +13:00
|
|
|
find_this = parsed_url.hostname.lower()
|
|
|
|
if find_this == 'youtu.be':
|
|
|
|
find_this = 'youtube.com'
|
|
|
|
domain = Domain.query.filter_by(name=find_this).first()
|
2024-02-23 16:52:17 +13:00
|
|
|
if create and domain is None:
|
2024-03-08 22:01:46 +13:00
|
|
|
domain = Domain(name=find_this)
|
2024-02-23 16:52:17 +13:00
|
|
|
db.session.add(domain)
|
|
|
|
db.session.commit()
|
|
|
|
return domain
|
|
|
|
else:
|
|
|
|
return None
|
2023-09-16 19:09:04 +12:00
|
|
|
|
2023-10-02 22:16:44 +13:00
|
|
|
|
|
|
|
def shorten_string(input_str, max_length=50):
|
|
|
|
if len(input_str) <= max_length:
|
|
|
|
return input_str
|
|
|
|
else:
|
|
|
|
return input_str[:max_length - 3] + '…'
|
|
|
|
|
|
|
|
|
|
|
|
def shorten_url(input: str, max_length=20):
|
|
|
|
return shorten_string(input.replace('https://', '').replace('http://', ''))
|
2023-10-10 22:25:37 +13:00
|
|
|
|
|
|
|
|
|
|
|
# the number of digits in a number. e.g. 1000 would be 4
|
|
|
|
def digits(input: int) -> int:
|
2024-01-11 21:18:34 +13:00
|
|
|
return len(shorten_number(input))
|
2023-10-21 15:49:01 +13:00
|
|
|
|
|
|
|
|
|
|
|
@cache.memoize(timeout=50)
|
|
|
|
def user_access(permission: str, user_id: int) -> bool:
|
|
|
|
has_access = db.session.execute(text('SELECT * FROM "role_permission" as rp ' +
|
|
|
|
'INNER JOIN user_role ur on rp.role_id = ur.role_id ' +
|
|
|
|
'WHERE ur.user_id = :user_id AND rp.permission = :permission'),
|
|
|
|
{'user_id': user_id, 'permission': permission}).first()
|
2023-10-21 16:20:13 +13:00
|
|
|
return has_access is not None
|
|
|
|
|
|
|
|
|
2024-01-04 17:07:02 +13:00
|
|
|
@cache.memoize(timeout=10)
|
2023-12-03 22:41:15 +13:00
|
|
|
def community_membership(user: User, community: Community) -> int:
|
|
|
|
# @cache.memoize works with User.subscribed but cache.delete_memoized does not, making it bad to use on class methods.
|
|
|
|
# however cache.memoize and cache.delete_memoized works fine with normal functions
|
|
|
|
if community is None:
|
|
|
|
return False
|
|
|
|
return user.subscribed(community.id)
|
|
|
|
|
|
|
|
|
2023-12-17 00:12:49 +13:00
|
|
|
@cache.memoize(timeout=86400)
|
|
|
|
def blocked_domains(user_id) -> List[int]:
|
|
|
|
blocks = DomainBlock.query.filter_by(user_id=user_id)
|
|
|
|
return [block.domain_id for block in blocks]
|
|
|
|
|
|
|
|
|
2023-10-21 16:20:13 +13:00
|
|
|
def retrieve_block_list():
|
|
|
|
try:
|
2024-01-03 22:52:19 +13:00
|
|
|
response = requests.get('https://raw.githubusercontent.com/rimu/no-qanon/master/domains.txt', timeout=1)
|
2023-10-21 16:20:13 +13:00
|
|
|
except:
|
|
|
|
return None
|
|
|
|
if response and response.status_code == 200:
|
2023-10-23 13:03:35 +13:00
|
|
|
return response.text
|
|
|
|
|
|
|
|
|
2024-03-05 09:07:26 +13:00
|
|
|
def retrieve_peertube_block_list():
|
|
|
|
try:
|
|
|
|
response = requests.get('https://peertube_isolation.frama.io/list/peertube_isolation.json', timeout=1)
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
list = ''
|
|
|
|
if response and response.status_code == 200:
|
|
|
|
response_data = response.json()
|
|
|
|
for row in response_data['data']:
|
|
|
|
list += row['value'] + "\n"
|
|
|
|
return list.strip()
|
|
|
|
|
|
|
|
|
2023-12-24 16:20:18 +13:00
|
|
|
def ensure_directory_exists(directory):
|
|
|
|
parts = directory.split('/')
|
|
|
|
rebuild_directory = ''
|
|
|
|
for part in parts:
|
|
|
|
rebuild_directory += part
|
|
|
|
if not os.path.isdir(rebuild_directory):
|
|
|
|
os.mkdir(rebuild_directory)
|
|
|
|
rebuild_directory += '/'
|
|
|
|
|
|
|
|
|
2024-02-28 12:55:30 +13:00
|
|
|
def mimetype_from_url(url):
|
|
|
|
parsed_url = urlparse(url)
|
|
|
|
path = parsed_url.path.split('?')[0] # Strip off anything after '?'
|
|
|
|
mime_type, _ = mimetypes.guess_type(path)
|
|
|
|
return mime_type
|
|
|
|
|
|
|
|
|
2023-10-23 13:03:35 +13:00
|
|
|
def validation_required(func):
|
|
|
|
@wraps(func)
|
|
|
|
def decorated_view(*args, **kwargs):
|
|
|
|
if current_user.verified:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
else:
|
|
|
|
return redirect(url_for('auth.validation_required'))
|
2023-11-03 21:59:48 +13:00
|
|
|
return decorated_view
|
|
|
|
|
|
|
|
|
|
|
|
def permission_required(permission):
|
|
|
|
def decorator(func):
|
|
|
|
@wraps(func)
|
|
|
|
def decorated_view(*args, **kwargs):
|
|
|
|
if user_access(permission, current_user.id):
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
else:
|
|
|
|
# Handle the case where the user doesn't have the required permission
|
|
|
|
return redirect(url_for('auth.permission_denied'))
|
|
|
|
|
|
|
|
return decorated_view
|
|
|
|
|
|
|
|
return decorator
|
2023-11-30 20:57:51 +13:00
|
|
|
|
|
|
|
|
|
|
|
# sends the user back to where they came from
|
|
|
|
def back(default_url):
|
|
|
|
# Get the referrer from the request headers
|
|
|
|
referrer = request.referrer
|
|
|
|
|
|
|
|
# If the referrer exists and is not the same as the current request URL, redirect to the referrer
|
|
|
|
if referrer and referrer != request.url:
|
|
|
|
return redirect(referrer)
|
|
|
|
|
|
|
|
# If referrer is not available or is the same as the current request URL, redirect to the default URL
|
|
|
|
return redirect(default_url)
|
2023-12-08 17:13:38 +13:00
|
|
|
|
|
|
|
|
|
|
|
# format a datetime in a way that is used in ActivityPub
|
|
|
|
def ap_datetime(date_time: datetime) -> str:
|
|
|
|
return date_time.isoformat() + '+00:00'
|
2023-12-13 21:04:11 +13:00
|
|
|
|
|
|
|
|
|
|
|
class MultiCheckboxField(SelectMultipleField):
|
|
|
|
widget = ListWidget(prefix_label=False)
|
2023-12-30 19:03:44 +13:00
|
|
|
option_widget = CheckboxInput()
|
|
|
|
|
|
|
|
|
|
|
|
def ip_address() -> str:
|
|
|
|
ip = request.headers.get('X-Forwarded-For') or request.remote_addr
|
|
|
|
if ',' in ip: # Remove all but first ip addresses
|
|
|
|
ip = ip[:ip.index(',')].strip()
|
|
|
|
return ip
|
|
|
|
|
|
|
|
|
|
|
|
def user_ip_banned() -> bool:
|
|
|
|
current_ip_address = ip_address()
|
|
|
|
if current_ip_address:
|
|
|
|
return current_ip_address in banned_ip_addresses()
|
|
|
|
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
@cache.memoize(timeout=30)
|
|
|
|
def instance_banned(domain: str) -> bool: # see also activitypub.util.instance_blocked()
|
2024-01-02 16:07:41 +13:00
|
|
|
banned = BannedInstances.query.filter_by(domain=domain).first()
|
|
|
|
return banned is not None
|
|
|
|
|
|
|
|
|
2023-12-30 19:03:44 +13:00
|
|
|
def user_cookie_banned() -> bool:
|
|
|
|
cookie = request.cookies.get('sesion', None)
|
|
|
|
return cookie is not None
|
|
|
|
|
|
|
|
|
2024-02-19 15:01:53 +13:00
|
|
|
@cache.memoize(timeout=30)
|
2023-12-30 19:03:44 +13:00
|
|
|
def banned_ip_addresses() -> List[str]:
|
|
|
|
ips = IpBan.query.all()
|
|
|
|
return [ip.ip_address for ip in ips]
|
2024-01-02 19:41:00 +13:00
|
|
|
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
def can_downvote(user, community: Community, site=None) -> bool:
|
|
|
|
if user is None or community is None or user.banned:
|
2024-01-02 19:41:00 +13:00
|
|
|
return False
|
|
|
|
|
|
|
|
if site is None:
|
|
|
|
try:
|
|
|
|
site = g.site
|
|
|
|
except:
|
|
|
|
site = Site.query.get(1)
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
if not site.enable_downvotes and community.is_local():
|
2024-01-02 19:41:00 +13:00
|
|
|
return False
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
if community.local_only and not user.is_local():
|
|
|
|
return False
|
2024-01-02 19:41:00 +13:00
|
|
|
|
2024-01-08 18:50:37 +13:00
|
|
|
if user.attitude < -0.40 or user.reputation < -10: # this should exclude about 3.7% of users.
|
2024-01-02 19:41:00 +13:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
def can_upvote(user, community: Community) -> bool:
|
|
|
|
if user is None or community is None or user.banned:
|
2024-01-02 19:41:00 +13:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
def can_create_post(user, content: Community) -> bool:
|
2024-01-02 19:41:00 +13:00
|
|
|
if user is None or content is None or user.banned:
|
|
|
|
return False
|
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
if content.is_moderator(user) or user.is_admin():
|
|
|
|
return True
|
2024-01-02 19:41:00 +13:00
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
if content.restricted_to_mods:
|
|
|
|
return False
|
2024-01-02 19:41:00 +13:00
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
if content.local_only and not user.is_local():
|
|
|
|
return False
|
2024-01-02 19:41:00 +13:00
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
return True
|
2024-01-02 19:41:00 +13:00
|
|
|
|
|
|
|
|
2024-02-24 11:07:06 +13:00
|
|
|
def can_create_post_reply(user, content: Community) -> bool:
|
|
|
|
if user is None or content is None or user.banned:
|
|
|
|
return False
|
|
|
|
|
|
|
|
if content.is_moderator(user) or user.is_admin():
|
|
|
|
return True
|
|
|
|
|
|
|
|
if content.local_only and not user.is_local():
|
|
|
|
return False
|
2024-01-02 19:41:00 +13:00
|
|
|
|
|
|
|
return True
|
2024-01-03 16:29:58 +13:00
|
|
|
|
|
|
|
|
2024-01-06 14:54:10 +13:00
|
|
|
def reply_already_exists(user_id, post_id, parent_id, body) -> bool:
|
|
|
|
if parent_id is None:
|
|
|
|
num_matching_replies = db.session.execute(text(
|
|
|
|
'SELECT COUNT(id) as c FROM "post_reply" WHERE user_id = :user_id AND post_id = :post_id AND parent_id is null AND body = :body'),
|
|
|
|
{'user_id': user_id, 'post_id': post_id, 'body': body}).scalar()
|
|
|
|
else:
|
|
|
|
num_matching_replies = db.session.execute(text(
|
|
|
|
'SELECT COUNT(id) as c FROM "post_reply" WHERE user_id = :user_id AND post_id = :post_id AND parent_id = :parent_id AND body = :body'),
|
|
|
|
{'user_id': user_id, 'post_id': post_id, 'parent_id': parent_id, 'body': body}).scalar()
|
|
|
|
return num_matching_replies != 0
|
|
|
|
|
|
|
|
|
|
|
|
def reply_is_just_link_to_gif_reaction(body) -> bool:
|
|
|
|
tmp_body = body.strip()
|
|
|
|
if tmp_body.startswith('https://media.tenor.com/') or \
|
2024-02-05 08:03:56 +13:00
|
|
|
tmp_body.startswith('https://media1.tenor.com/') or \
|
|
|
|
tmp_body.startswith('https://media2.tenor.com/') or \
|
|
|
|
tmp_body.startswith('https://media3.tenor.com/') or \
|
2024-01-07 21:36:04 +13:00
|
|
|
tmp_body.startswith('https://i.giphy.com/') or \
|
2024-01-10 19:06:35 +13:00
|
|
|
tmp_body.startswith('https://i.imgflip.com') or \
|
2024-01-06 14:54:10 +13:00
|
|
|
tmp_body.startswith('https://media1.giphy.com/') or \
|
|
|
|
tmp_body.startswith('https://media2.giphy.com/') or \
|
|
|
|
tmp_body.startswith('https://media3.giphy.com/') or \
|
|
|
|
tmp_body.startswith('https://media4.giphy.com/'):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2024-01-03 16:29:58 +13:00
|
|
|
def inbox_domain(inbox: str) -> str:
|
|
|
|
inbox = inbox.lower()
|
|
|
|
if 'https://' in inbox or 'http://' in inbox:
|
|
|
|
inbox = urlparse(inbox).hostname
|
|
|
|
return inbox
|
|
|
|
|
|
|
|
|
|
|
|
def awaken_dormant_instance(instance):
|
|
|
|
if instance and not instance.gone_forever:
|
|
|
|
if instance.dormant:
|
|
|
|
if instance.start_trying_again < utcnow():
|
|
|
|
instance.dormant = False
|
|
|
|
db.session.commit()
|
|
|
|
# give up after ~5 days of trying
|
|
|
|
if instance.start_trying_again and utcnow() + timedelta(days=5) < instance.start_trying_again:
|
|
|
|
instance.gone_forever = True
|
|
|
|
instance.dormant = True
|
|
|
|
db.session.commit()
|
2024-01-03 20:14:39 +13:00
|
|
|
|
|
|
|
|
2024-01-10 09:44:59 +13:00
|
|
|
def shorten_number(number):
|
|
|
|
if number < 1000:
|
|
|
|
return str(number)
|
|
|
|
elif number < 1000000:
|
|
|
|
return f'{number / 1000:.1f}k'
|
|
|
|
else:
|
|
|
|
return f'{number / 1000000:.1f}M'
|
|
|
|
|
|
|
|
|
2024-01-11 20:39:22 +13:00
|
|
|
@cache.memoize(timeout=300)
|
|
|
|
def user_filters_home(user_id):
|
|
|
|
filters = Filter.query.filter_by(user_id=user_id, filter_home=True).filter(or_(Filter.expire_after > date.today(), Filter.expire_after == None))
|
|
|
|
result = defaultdict(set)
|
|
|
|
for filter in filters:
|
|
|
|
keywords = [keyword.strip().lower() for keyword in filter.keywords.splitlines()]
|
|
|
|
if filter.hide_type == 0:
|
|
|
|
result[filter.title].update(keywords)
|
|
|
|
else: # type == 1 means hide completely. These posts are excluded from output by the jinja template
|
|
|
|
result['-1'].update(keywords)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
@cache.memoize(timeout=300)
|
|
|
|
def user_filters_posts(user_id):
|
|
|
|
filters = Filter.query.filter_by(user_id=user_id, filter_posts=True).filter(or_(Filter.expire_after > date.today(), Filter.expire_after == None))
|
|
|
|
result = defaultdict(set)
|
|
|
|
for filter in filters:
|
|
|
|
keywords = [keyword.strip().lower() for keyword in filter.keywords.splitlines()]
|
|
|
|
if filter.hide_type == 0:
|
|
|
|
result[filter.title].update(keywords)
|
|
|
|
else:
|
|
|
|
result['-1'].update(keywords)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
@cache.memoize(timeout=300)
|
|
|
|
def user_filters_replies(user_id):
|
|
|
|
filters = Filter.query.filter_by(user_id=user_id, filter_replies=True).filter(or_(Filter.expire_after > date.today(), Filter.expire_after == None))
|
|
|
|
result = defaultdict(set)
|
|
|
|
for filter in filters:
|
|
|
|
keywords = [keyword.strip().lower() for keyword in filter.keywords.splitlines()]
|
|
|
|
if filter.hide_type == 0:
|
|
|
|
result[filter.title].update(keywords)
|
|
|
|
else:
|
|
|
|
result['-1'].update(keywords)
|
|
|
|
return result
|
|
|
|
|
2024-01-10 09:44:59 +13:00
|
|
|
|
2024-01-12 12:34:08 +13:00
|
|
|
@cache.memoize(timeout=300)
|
|
|
|
def moderating_communities(user_id):
|
2024-01-12 13:49:40 +13:00
|
|
|
if user_id is None or user_id == 0:
|
2024-01-12 12:34:08 +13:00
|
|
|
return []
|
|
|
|
return Community.query.join(CommunityMember, Community.id == CommunityMember.community_id).\
|
|
|
|
filter(Community.banned == False).\
|
|
|
|
filter(or_(CommunityMember.is_moderator == True, CommunityMember.is_owner == True)).\
|
|
|
|
filter(CommunityMember.user_id == user_id).order_by(Community.title).all()
|
|
|
|
|
|
|
|
|
|
|
|
@cache.memoize(timeout=300)
|
|
|
|
def joined_communities(user_id):
|
2024-01-12 13:49:40 +13:00
|
|
|
if user_id is None or user_id == 0:
|
2024-01-12 12:34:08 +13:00
|
|
|
return []
|
|
|
|
return Community.query.join(CommunityMember, Community.id == CommunityMember.community_id).\
|
|
|
|
filter(Community.banned == False). \
|
|
|
|
filter(CommunityMember.is_moderator == False, CommunityMember.is_owner == False). \
|
|
|
|
filter(CommunityMember.user_id == user_id).order_by(Community.title).all()
|
|
|
|
|
|
|
|
|
2024-02-02 15:30:03 +13:00
|
|
|
def finalize_user_setup(user, application_required=False):
|
|
|
|
from app.activitypub.signature import RsaKeys
|
|
|
|
user.verified = True
|
|
|
|
user.last_seen = utcnow()
|
|
|
|
private_key, public_key = RsaKeys.generate_keypair()
|
|
|
|
user.private_key = private_key
|
|
|
|
user.public_key = public_key
|
|
|
|
user.ap_profile_id = f"https://{current_app.config['SERVER_NAME']}/u/{user.user_name}"
|
|
|
|
user.ap_public_url = f"https://{current_app.config['SERVER_NAME']}/u/{user.user_name}"
|
|
|
|
user.ap_inbox_url = f"https://{current_app.config['SERVER_NAME']}/u/{user.user_name}/inbox"
|
|
|
|
db.session.commit()
|
|
|
|
send_welcome_email(user, application_required)
|
|
|
|
|
2024-01-12 12:34:08 +13:00
|
|
|
|
2024-01-07 21:36:04 +13:00
|
|
|
# All the following post/comment ranking math is explained at https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9
|
2024-01-03 20:14:39 +13:00
|
|
|
epoch = datetime(1970, 1, 1)
|
|
|
|
|
|
|
|
def epoch_seconds(date):
|
|
|
|
td = date - epoch
|
|
|
|
return td.days * 86400 + td.seconds + (float(td.microseconds) / 1000000)
|
|
|
|
|
2024-01-05 08:11:35 +13:00
|
|
|
|
2024-01-03 20:14:39 +13:00
|
|
|
def post_ranking(score, date: datetime):
|
2024-01-05 08:11:35 +13:00
|
|
|
if date is None:
|
|
|
|
date = datetime.utcnow()
|
2024-01-06 08:16:50 +13:00
|
|
|
if score is None:
|
|
|
|
score = 1
|
2024-01-03 20:14:39 +13:00
|
|
|
order = math.log(max(abs(score), 1), 10)
|
|
|
|
sign = 1 if score > 0 else -1 if score < 0 else 0
|
|
|
|
seconds = epoch_seconds(date) - 1685766018
|
|
|
|
return round(sign * order + seconds / 45000, 7)
|
2024-01-07 21:36:04 +13:00
|
|
|
|
|
|
|
|
|
|
|
# used for ranking comments
|
|
|
|
def _confidence(ups, downs):
|
|
|
|
n = ups + downs
|
|
|
|
|
|
|
|
if n == 0:
|
|
|
|
return 0.0
|
|
|
|
|
|
|
|
z = 1.281551565545
|
|
|
|
p = float(ups) / n
|
|
|
|
|
|
|
|
left = p + 1 / (2 * n) * z * z
|
|
|
|
right = z * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
|
|
|
|
under = 1 + 1 / n * z * z
|
|
|
|
|
|
|
|
return (left - right) / under
|
|
|
|
|
|
|
|
|
|
|
|
def confidence(ups, downs) -> float:
|
2024-01-25 20:16:08 +13:00
|
|
|
if ups is None or ups < 0:
|
2024-01-19 07:45:48 +13:00
|
|
|
ups = 0
|
2024-01-25 20:16:08 +13:00
|
|
|
if downs is None or downs < 0:
|
2024-01-19 07:45:48 +13:00
|
|
|
downs = 0
|
2024-01-07 21:36:04 +13:00
|
|
|
if ups + downs == 0:
|
|
|
|
return 0.0
|
|
|
|
else:
|
|
|
|
return _confidence(ups, downs)
|
2024-02-04 22:02:32 +13:00
|
|
|
|
|
|
|
|
|
|
|
# By no means is this a complete list, but it is very easy to search for the ones you need later.
|
|
|
|
KNOWN_OPENGRAPH_TAGS = [
|
|
|
|
"og:site_name",
|
|
|
|
"og:title",
|
|
|
|
"og:locale",
|
|
|
|
"og:type",
|
|
|
|
"og:image",
|
|
|
|
"og:url",
|
|
|
|
"og:image:url",
|
|
|
|
"og:image:secure_url",
|
|
|
|
"og:image:type",
|
|
|
|
"og:image:width",
|
|
|
|
"og:image:height",
|
|
|
|
"og:image:alt",
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def parse_page(page_url, tags_to_search = KNOWN_OPENGRAPH_TAGS, fallback_tags = None):
|
|
|
|
'''
|
|
|
|
Parses a page, returns a JSON style dictionary of all OG tags found on that page.
|
|
|
|
|
|
|
|
Passing in tags_to_search is optional. By default it will search through KNOWN_OPENGRAPH_TAGS constant, but for the sake of efficiency, you may want to only search for 1 or 2 tags
|
|
|
|
|
|
|
|
Returns False if page is unreadable
|
|
|
|
'''
|
|
|
|
# read the html from the page
|
|
|
|
response = get_request(page_url)
|
|
|
|
|
2024-02-05 16:23:42 +13:00
|
|
|
if response.status_code != 200:
|
2024-02-04 22:02:32 +13:00
|
|
|
return False
|
|
|
|
|
|
|
|
# set up beautiful soup
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
|
|
|
|
# loop through the known list of opengraph tags, searching for each and appending a dictionary as we go.
|
|
|
|
found_tags = {}
|
|
|
|
|
|
|
|
for og_tag in tags_to_search:
|
|
|
|
new_found_tag = soup.find("meta", property=og_tag)
|
|
|
|
if new_found_tag is not None:
|
|
|
|
found_tags[new_found_tag["property"]] = new_found_tag["content"]
|
|
|
|
elif fallback_tags is not None and og_tag in fallback_tags:
|
|
|
|
found_tags[og_tag] = soup.find(fallback_tags[og_tag]).text
|
|
|
|
|
|
|
|
return found_tags
|
2024-02-07 17:31:12 +13:00
|
|
|
|
|
|
|
|
|
|
|
def current_theme():
|
2024-02-07 18:33:25 +13:00
|
|
|
""" The theme the current user has set, falling back to the site default if none specified or user is not logged in """
|
2024-02-07 17:31:12 +13:00
|
|
|
if current_user.is_authenticated:
|
|
|
|
if current_user.theme is not None and current_user.theme != '':
|
|
|
|
return current_user.theme
|
|
|
|
else:
|
|
|
|
return g.site.default_theme if g.site.default_theme is not None else ''
|
|
|
|
else:
|
|
|
|
return g.site.default_theme if g.site.default_theme is not None else ''
|
2024-02-07 18:33:25 +13:00
|
|
|
|
|
|
|
|
|
|
|
def theme_list():
|
|
|
|
""" All the themes available, by looking in the templates/themes directory """
|
|
|
|
result = [('', 'PieFed')]
|
|
|
|
for root, dirs, files in os.walk('app/templates/themes'):
|
|
|
|
for dir in dirs:
|
|
|
|
if os.path.exists(f'app/templates/themes/{dir}/{dir}.json'):
|
|
|
|
theme_settings = json.loads(file_get_contents(f'app/templates/themes/{dir}/{dir}.json'))
|
|
|
|
result.append((dir, theme_settings['name']))
|
|
|
|
return result
|
2024-02-25 16:24:50 +13:00
|
|
|
|
|
|
|
|
|
|
|
def sha256_digest(input_string):
|
|
|
|
"""
|
|
|
|
Compute the SHA-256 hash digest of a given string.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- input_string: The string to compute the hash digest for.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
- A hexadecimal string representing the SHA-256 hash digest.
|
|
|
|
"""
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
|
|
sha256_hash.update(input_string.encode('utf-8'))
|
|
|
|
return sha256_hash.hexdigest()
|
2024-03-08 01:33:58 +00:00
|
|
|
|
|
|
|
|
2024-03-08 22:01:46 +13:00
|
|
|
def remove_tracking_from_link(url):
|
2024-03-08 22:09:54 +13:00
|
|
|
parsed_url = urlparse(url)
|
|
|
|
|
|
|
|
if parsed_url.netloc == 'youtu.be':
|
|
|
|
# Extract video ID
|
|
|
|
video_id = parsed_url.path[1:] # Remove leading slash
|
|
|
|
|
|
|
|
# Preserve 't' parameter if it exists
|
|
|
|
query_params = parse_qs(parsed_url.query)
|
|
|
|
if 't' in query_params:
|
|
|
|
new_query_params = {'t': query_params['t']}
|
|
|
|
new_query_string = urlencode(new_query_params, doseq=True)
|
|
|
|
else:
|
|
|
|
new_query_string = ''
|
|
|
|
|
|
|
|
cleaned_url = f"https://youtu.be/{video_id}"
|
|
|
|
if new_query_string:
|
|
|
|
cleaned_url += f"?{new_query_string}"
|
2024-03-08 01:33:58 +00:00
|
|
|
|
2024-03-08 22:09:54 +13:00
|
|
|
return cleaned_url
|
2024-03-08 01:33:58 +00:00
|
|
|
else:
|
|
|
|
return url
|
|
|
|
|