diff --git a/Dockerfile b/Dockerfile index 08a9130c..b10e111b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM --platform=$BUILDPLATFORM python:3-alpine AS builder RUN apk update RUN apk add pkgconfig -RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng ffmpeg +RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng WORKDIR /app COPY . /app diff --git a/INSTALL.md b/INSTALL.md index d29e7830..31732fb3 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,7 +1,8 @@ # Contents -* [Setup Database](#setup-database) -* [Install Python Libraries](#install-python-libraries) +* [Choose your path - easy way or hard way](#choose-path) +* [Setup Database](#setup-database) +* [Install Python Libraries](#install-python-libraries) * [Install additional requirements](#install-additional-requirements) * [Setup pyfedi](#setup-pyfedi) * [Setup .env file](#setup-env-file) @@ -14,6 +15,110 @@ * [Notes for Windows (WSL2)](#notes-for-windows-wsl2) * [Notes for Pip Package Management](#notes-for-pip-package-management) +
+ +## Do you want this the easy way or the hard way? + +### Easy way: docker + +Docker can be used to create an isolated environment that is separate from the host server and starts from a consistent +configuration. While it is quicker and easier, it's not to everyone's taste. + +* Clone PieFed into a new directory + +```bash +git clone https://codeberg.org/rimu/pyfedi.git +``` + +* Copy suggested docker config + +```bash +cd pyfedi +cp env.docker.sample .env.docker +``` + +* Edit docker environment file + +Open .env.docker in your text editor, set SECRET_KEY to something random and set SERVER_NAME to your domain name, +WITHOUT the https:// at the front. The database login details doesn't really need to be changed because postgres will be +locked away inside it's own docker network that only PieFed can access but if you want to change POSTGRES_PASSWORD go ahead +just be sure to update DATABASE_URL accordingly. + +Check out compose.yaml and see if it is to your liking. Note the port (8030) and volume definitions - they might need to be +tweaked. + +* First startup + +This will take a few minutes. + +```bash +export DOCKER_BUILDKIT=1 +docker-compose up --build +``` + +After a while the gibberish will stop scrolling past. If you see errors let us know at [https://piefed.social/c/piefed_help](https://piefed.social/c/piefed_help). + +* Networking + +You need to somehow to allow client connections from outside to access port 8030 on your server. The details of this is outside the scope +of this article. You could use a nginx reverse proxy, a cloudflare zero trust tunnel, tailscale, whatever. Just make sure it has SSL on +it as PieFed assumes you're making requests that start with https://your-domain. + +Once you have the networking set up, go to https://your-domain in your browser and see if the docker output in your terminal +shows signs of reacting to the request. There will be an error showing up in the console because we haven't done the next step yet. + +* Database initialization + +This must be done once and once only. Doing this will wipe all existing data in your instance so do not do it unless you have a +brand new instance. + +Open a shell inside the PieFed docker container: + +`docker exec -it piefed_app1 sh` + +Inside the container, run the initialization command: + +``` +export FLASK_APP=pyfedi.py +flask init-db +``` + +Among other things this process will get you set up with a username and password. Don't use 'admin' as the user name, script kiddies love that one. + +* The moment of truth + +Go to https://your-domain in your web browser and PieFed should appear. Log in with the username and password from the previous step. + +At this point docker is pretty much Ok so you don't need to see the terminal output as readily. Hit Ctrl + C to close down docker and then run + +```bash +docker-compose up -d +``` + +to have PieFed run in the background. + +* But wait there's more + +Until you set the right environment variables, PieFed won't be able to send email. Check out env.sample for some hints. +When you have a new value to set, add it to .env.docker and then restart docker with: + +``` +docker-compose down && docker-compose up -d +``` + +There are also regular cron jobs that need to be run. Set up cron on the host to run those scripts inside the container - see the Cron +section of this document for details. + +You probably want a Captcha on the registration form - more environment variables. + +CDN, CloudFlare. More environment variables. + +All this is explained in the bare metal guide, below. + +### Hard way: bare metal + +Read on + ## Setup Database @@ -77,7 +182,7 @@ sudo apt install tesseract-ocr ## Setup PyFedi -* Clone PyFedi +* Clone PieFed ```bash git clone https://codeberg.org/rimu/pyfedi.git diff --git a/app/activitypub/util.py b/app/activitypub/util.py index c4aa0014..2fefb9ef 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -10,7 +10,6 @@ import httpx import redis from flask import current_app, request, g, url_for, json from flask_babel import _ -from requests import JSONDecodeError from sqlalchemy import text, func, desc from sqlalchemy.exc import IntegrityError @@ -29,7 +28,7 @@ import pytesseract from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \ shorten_string, remove_tracking_from_link, \ - microblog_content_to_title, generate_image_from_video_url, is_video_url, \ + microblog_content_to_title, is_video_url, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ html_to_text, add_to_modlog_activitypub, joined_communities, \ moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse @@ -1009,148 +1008,106 @@ def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory, to session = get_task_session() file: File = session.query(File).get(file_id) if file and file.source_url: - # Videos (old code. not invoked because file.source_url won't end .mp4 or .webm) - if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'): - new_filename = gibberish(15) - - # set up the storage directory - directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] - ensure_directory_exists(directory) - - # file path and names to store the resized images on disk - final_place = os.path.join(directory, new_filename + '.jpg') - final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') - try: - generate_image_from_video_url(file.source_url, final_place) - except Exception as e: - return - - if final_place: - image = Image.open(final_place) - img_width = image.width - - # Resize the image to medium - if medium_width: - if img_width > medium_width: - image.thumbnail((medium_width, medium_width)) - image.save(final_place) - file.file_path = final_place - file.width = image.width - file.height = image.height - - # Resize the image to a thumbnail (webp) - if thumbnail_width: - if img_width > thumbnail_width: - image.thumbnail((thumbnail_width, thumbnail_width)) - image.save(final_place_thumbnail, format="WebP", quality=93) - file.thumbnail_path = final_place_thumbnail - file.thumbnail_width = image.width - file.thumbnail_height = image.height - - session.commit() - - # Images + try: + source_image_response = get_request(file.source_url) + except: + pass else: - try: - source_image_response = get_request(file.source_url) - except: - pass - else: - if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url: - source_image_response.close() - # Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63 - # The un-proxied image url is the query parameter called 'url' - parsed_url = urlparse(file.source_url) - query_params = parse_qs(parsed_url.query) - if 'url' in query_params: - url_value = query_params['url'][0] - source_image_response = get_request(url_value) - else: - source_image_response = None - if source_image_response and source_image_response.status_code == 200: - content_type = source_image_response.headers.get('content-type') - if content_type: - if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')): - source_image = source_image_response.content - source_image_response.close() + if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url: + source_image_response.close() + # Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63 + # The un-proxied image url is the query parameter called 'url' + parsed_url = urlparse(file.source_url) + query_params = parse_qs(parsed_url.query) + if 'url' in query_params: + url_value = query_params['url'][0] + source_image_response = get_request(url_value) + else: + source_image_response = None + if source_image_response and source_image_response.status_code == 200: + content_type = source_image_response.headers.get('content-type') + if content_type: + if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')): + source_image = source_image_response.content + source_image_response.close() - content_type_parts = content_type.split('/') - if content_type_parts: - # content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8' + content_type_parts = content_type.split('/') + if content_type_parts: + # content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8' - # Remove ;charset=whatever - main_part = content_type.split(';')[0] + # Remove ;charset=whatever + main_part = content_type.split(';')[0] - # Split the main part on the '/' character and take the second part - file_ext = '.' + main_part.split('/')[1] - file_ext = file_ext.strip() # just to be sure + # Split the main part on the '/' character and take the second part + file_ext = '.' + main_part.split('/')[1] + file_ext = file_ext.strip() # just to be sure - if file_ext == '.jpeg': - file_ext = '.jpg' - elif file_ext == '.svg+xml': - return # no need to resize SVG images - elif file_ext == '.octet-stream': - file_ext = '.avif' - else: - file_ext = os.path.splitext(file.source_url)[1] - file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly - if '?' in file_ext: - file_ext = file_ext.split('?')[0] + if file_ext == '.jpeg': + file_ext = '.jpg' + elif file_ext == '.svg+xml': + return # no need to resize SVG images + elif file_ext == '.octet-stream': + file_ext = '.avif' + else: + file_ext = os.path.splitext(file.source_url)[1] + file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly + if '?' in file_ext: + file_ext = file_ext.split('?')[0] - new_filename = gibberish(15) + new_filename = gibberish(15) - # set up the storage directory - directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] - ensure_directory_exists(directory) + # set up the storage directory + directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] + ensure_directory_exists(directory) - # file path and names to store the resized images on disk - final_place = os.path.join(directory, new_filename + file_ext) - final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') + # file path and names to store the resized images on disk + final_place = os.path.join(directory, new_filename + file_ext) + final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') - if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary - import pillow_avif + if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary + import pillow_avif - # Load image data into Pillow - Image.MAX_IMAGE_PIXELS = 89478485 - image = Image.open(BytesIO(source_image)) - image = ImageOps.exif_transpose(image) - img_width = image.width - img_height = image.height + # Load image data into Pillow + Image.MAX_IMAGE_PIXELS = 89478485 + image = Image.open(BytesIO(source_image)) + image = ImageOps.exif_transpose(image) + img_width = image.width + img_height = image.height - # Resize the image to medium - if medium_width: - if img_width > medium_width: - image.thumbnail((medium_width, medium_width)) - image.save(final_place) - file.file_path = final_place - file.width = image.width - file.height = image.height + # Resize the image to medium + if medium_width: + if img_width > medium_width: + image.thumbnail((medium_width, medium_width)) + image.save(final_place) + file.file_path = final_place + file.width = image.width + file.height = image.height - # Resize the image to a thumbnail (webp) - if thumbnail_width: - if img_width > thumbnail_width: - image.thumbnail((thumbnail_width, thumbnail_width)) - image.save(final_place_thumbnail, format="WebP", quality=93) - file.thumbnail_path = final_place_thumbnail - file.thumbnail_width = image.width - file.thumbnail_height = image.height + # Resize the image to a thumbnail (webp) + if thumbnail_width: + if img_width > thumbnail_width: + image.thumbnail((thumbnail_width, thumbnail_width)) + image.save(final_place_thumbnail, format="WebP", quality=93) + file.thumbnail_path = final_place_thumbnail + file.thumbnail_width = image.width + file.thumbnail_height = image.height - session.commit() + session.commit() - # Alert regarding fascist meme content - if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. - try: - image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) - except Exception as e: - image_text = '' - if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' - post = Post.query.filter_by(image_id=file.id).first() - notification = Notification(title='Review this', - user_id=1, - author_id=post.user_id, - url=url_for('activitypub.post_ap', post_id=post.id)) - session.add(notification) - session.commit() + # Alert regarding fascist meme content + if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. + try: + image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) + except Exception as e: + image_text = '' + if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' + post = Post.query.filter_by(image_id=file.id).first() + notification = Notification(title='Review this', + user_id=1, + author_id=post.user_id, + url=url_for('activitypub.post_ap', post_id=post.id)) + session.add(notification) + session.commit() def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]: diff --git a/app/models.py b/app/models.py index ceae4a4a..1c8c3f26 100644 --- a/app/models.py +++ b/app/models.py @@ -1364,7 +1364,7 @@ class Post(db.Model): i += 1 db.session.commit() - if post.image_id: + if post.image_id and not post.type == constants.POST_TYPE_VIDEO: make_image_sizes(post.image_id, 170, 512, 'posts', community.low_quality) # the 512 sized image is for masonry view diff --git a/app/templates/admin/_nav.html b/app/templates/admin/_nav.html index e04d0b84..dea6efd2 100644 --- a/app/templates/admin/_nav.html +++ b/app/templates/admin/_nav.html @@ -15,6 +15,7 @@ {{ _('Newsletter') }} | {{ _('Permissions') }} | {{ _('Activities') }} + {{ _('Modlog') }} {% if debug_mode %} | {{ _('Dev Tools') }} {% endif%} diff --git a/app/templates/base.html b/app/templates/base.html index 718d1d83..a7467df1 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -228,8 +228,9 @@