From d73c12d4c761b95f34c1b7c957aa6c6bda6b852a Mon Sep 17 00:00:00 2001
From: rimu <3310831+rimu@users.noreply.github.com>
Date: Mon, 16 Dec 2024 20:49:43 +1300
Subject: [PATCH] remove moviepy and ffmpeg dependency

---
 Dockerfile              |   2 +-
 app/activitypub/util.py | 217 ++++++++++++++++------------------------
 app/models.py           |   2 +-
 app/utils.py            |  47 ---------
 requirements.txt        |   1 -
 5 files changed, 89 insertions(+), 180 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 08a9130c..b10e111b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,7 +4,7 @@ FROM --platform=$BUILDPLATFORM python:3-alpine AS builder
 
 RUN apk update
 RUN apk add pkgconfig
-RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng ffmpeg
+RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng
 
 WORKDIR /app
 COPY . /app
diff --git a/app/activitypub/util.py b/app/activitypub/util.py
index c4aa0014..2fefb9ef 100644
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@@ -10,7 +10,6 @@ import httpx
 import redis
 from flask import current_app, request, g, url_for, json
 from flask_babel import _
-from requests import JSONDecodeError
 from sqlalchemy import text, func, desc
 from sqlalchemy.exc import IntegrityError
 
@@ -29,7 +28,7 @@ import pytesseract
 from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
     is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
     shorten_string, remove_tracking_from_link, \
-    microblog_content_to_title, generate_image_from_video_url, is_video_url, \
+    microblog_content_to_title, is_video_url, \
     notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
     html_to_text, add_to_modlog_activitypub, joined_communities, \
     moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
@@ -1009,148 +1008,106 @@ def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory, to
     session = get_task_session()
     file: File = session.query(File).get(file_id)
     if file and file.source_url:
-        # Videos (old code. not invoked because file.source_url won't end .mp4 or .webm)
-        if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'):
-            new_filename = gibberish(15)
-
-            # set up the storage directory
-            directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
-            ensure_directory_exists(directory)
-
-            # file path and names to store the resized images on disk
-            final_place = os.path.join(directory, new_filename + '.jpg')
-            final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
-            try:
-                generate_image_from_video_url(file.source_url, final_place)
-            except Exception as e:
-                return
-
-            if final_place:
-                image = Image.open(final_place)
-                img_width = image.width
-
-                # Resize the image to medium
-                if medium_width:
-                    if img_width > medium_width:
-                        image.thumbnail((medium_width, medium_width))
-                    image.save(final_place)
-                    file.file_path = final_place
-                    file.width = image.width
-                    file.height = image.height
-
-                # Resize the image to a thumbnail (webp)
-                if thumbnail_width:
-                    if img_width > thumbnail_width:
-                        image.thumbnail((thumbnail_width, thumbnail_width))
-                    image.save(final_place_thumbnail, format="WebP", quality=93)
-                    file.thumbnail_path = final_place_thumbnail
-                    file.thumbnail_width = image.width
-                    file.thumbnail_height = image.height
-
-                session.commit()
-
-        # Images
+        try:
+            source_image_response = get_request(file.source_url)
+        except:
+            pass
         else:
-            try:
-                source_image_response = get_request(file.source_url)
-            except:
-                pass
-            else:
-                if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url:
-                    source_image_response.close()
-                    # Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
-                    # The un-proxied image url is the query parameter called 'url'
-                    parsed_url = urlparse(file.source_url)
-                    query_params = parse_qs(parsed_url.query)
-                    if 'url' in query_params:
-                        url_value = query_params['url'][0]
-                        source_image_response = get_request(url_value)
-                    else:
-                        source_image_response = None
-                if source_image_response and source_image_response.status_code == 200:
-                    content_type = source_image_response.headers.get('content-type')
-                    if content_type:
-                        if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
-                            source_image = source_image_response.content
-                            source_image_response.close()
+            if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url:
+                source_image_response.close()
+                # Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
+                # The un-proxied image url is the query parameter called 'url'
+                parsed_url = urlparse(file.source_url)
+                query_params = parse_qs(parsed_url.query)
+                if 'url' in query_params:
+                    url_value = query_params['url'][0]
+                    source_image_response = get_request(url_value)
+                else:
+                    source_image_response = None
+            if source_image_response and source_image_response.status_code == 200:
+                content_type = source_image_response.headers.get('content-type')
+                if content_type:
+                    if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
+                        source_image = source_image_response.content
+                        source_image_response.close()
 
-                            content_type_parts = content_type.split('/')
-                            if content_type_parts:
-                                # content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
+                        content_type_parts = content_type.split('/')
+                        if content_type_parts:
+                            # content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
 
-                                # Remove ;charset=whatever
-                                main_part = content_type.split(';')[0]
+                            # Remove ;charset=whatever
+                            main_part = content_type.split(';')[0]
 
-                                # Split the main part on the '/' character and take the second part
-                                file_ext = '.' + main_part.split('/')[1]
-                                file_ext = file_ext.strip() # just to be sure
+                            # Split the main part on the '/' character and take the second part
+                            file_ext = '.' + main_part.split('/')[1]
+                            file_ext = file_ext.strip() # just to be sure
 
-                                if file_ext == '.jpeg':
-                                    file_ext = '.jpg'
-                                elif file_ext == '.svg+xml':
-                                    return  # no need to resize SVG images
-                                elif file_ext == '.octet-stream':
-                                    file_ext = '.avif'
-                            else:
-                                file_ext = os.path.splitext(file.source_url)[1]
-                                file_ext = file_ext.replace('%3f', '?')  # sometimes urls are not decoded properly
-                                if '?' in file_ext:
-                                    file_ext = file_ext.split('?')[0]
+                            if file_ext == '.jpeg':
+                                file_ext = '.jpg'
+                            elif file_ext == '.svg+xml':
+                                return  # no need to resize SVG images
+                            elif file_ext == '.octet-stream':
+                                file_ext = '.avif'
+                        else:
+                            file_ext = os.path.splitext(file.source_url)[1]
+                            file_ext = file_ext.replace('%3f', '?')  # sometimes urls are not decoded properly
+                            if '?' in file_ext:
+                                file_ext = file_ext.split('?')[0]
 
-                            new_filename = gibberish(15)
+                        new_filename = gibberish(15)
 
-                            # set up the storage directory
-                            directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
-                            ensure_directory_exists(directory)
+                        # set up the storage directory
+                        directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
+                        ensure_directory_exists(directory)
 
-                            # file path and names to store the resized images on disk
-                            final_place = os.path.join(directory, new_filename + file_ext)
-                            final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
+                        # file path and names to store the resized images on disk
+                        final_place = os.path.join(directory, new_filename + file_ext)
+                        final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
 
-                            if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary
-                                import pillow_avif
+                        if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary
+                            import pillow_avif
 
-                            # Load image data into Pillow
-                            Image.MAX_IMAGE_PIXELS = 89478485
-                            image = Image.open(BytesIO(source_image))
-                            image = ImageOps.exif_transpose(image)
-                            img_width = image.width
-                            img_height = image.height
+                        # Load image data into Pillow
+                        Image.MAX_IMAGE_PIXELS = 89478485
+                        image = Image.open(BytesIO(source_image))
+                        image = ImageOps.exif_transpose(image)
+                        img_width = image.width
+                        img_height = image.height
 
-                            # Resize the image to medium
-                            if medium_width:
-                                if img_width > medium_width:
-                                    image.thumbnail((medium_width, medium_width))
-                                image.save(final_place)
-                                file.file_path = final_place
-                                file.width = image.width
-                                file.height = image.height
+                        # Resize the image to medium
+                        if medium_width:
+                            if img_width > medium_width:
+                                image.thumbnail((medium_width, medium_width))
+                            image.save(final_place)
+                            file.file_path = final_place
+                            file.width = image.width
+                            file.height = image.height
 
-                            # Resize the image to a thumbnail (webp)
-                            if thumbnail_width:
-                                if img_width > thumbnail_width:
-                                    image.thumbnail((thumbnail_width, thumbnail_width))
-                                image.save(final_place_thumbnail, format="WebP", quality=93)
-                                file.thumbnail_path = final_place_thumbnail
-                                file.thumbnail_width = image.width
-                                file.thumbnail_height = image.height
+                        # Resize the image to a thumbnail (webp)
+                        if thumbnail_width:
+                            if img_width > thumbnail_width:
+                                image.thumbnail((thumbnail_width, thumbnail_width))
+                            image.save(final_place_thumbnail, format="WebP", quality=93)
+                            file.thumbnail_path = final_place_thumbnail
+                            file.thumbnail_width = image.width
+                            file.thumbnail_height = image.height
 
-                            session.commit()
+                        session.commit()
 
-                            # Alert regarding fascist meme content
-                            if toxic_community and img_width < 2000:    # images > 2000px tend to be real photos instead of 4chan screenshots.
-                                try:
-                                    image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
-                                except Exception as e:
-                                    image_text = ''
-                                if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text):   # chan posts usually contain the text 'Anonymous' and ' No.12345'
-                                    post = Post.query.filter_by(image_id=file.id).first()
-                                    notification = Notification(title='Review this',
-                                                                user_id=1,
-                                                                author_id=post.user_id,
-                                                                url=url_for('activitypub.post_ap', post_id=post.id))
-                                    session.add(notification)
-                                    session.commit()
+                        # Alert regarding fascist meme content
+                        if toxic_community and img_width < 2000:    # images > 2000px tend to be real photos instead of 4chan screenshots.
+                            try:
+                                image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
+                            except Exception as e:
+                                image_text = ''
+                            if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text):   # chan posts usually contain the text 'Anonymous' and ' No.12345'
+                                post = Post.query.filter_by(image_id=file.id).first()
+                                notification = Notification(title='Review this',
+                                                            user_id=1,
+                                                            author_id=post.user_id,
+                                                            url=url_for('activitypub.post_ap', post_id=post.id))
+                                session.add(notification)
+                                session.commit()
 
 
 def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]:
diff --git a/app/models.py b/app/models.py
index ceae4a4a..1c8c3f26 100644
--- a/app/models.py
+++ b/app/models.py
@@ -1364,7 +1364,7 @@ class Post(db.Model):
                     i += 1
                 db.session.commit()
 
-            if post.image_id:
+            if post.image_id and not post.type == constants.POST_TYPE_VIDEO:
                 make_image_sizes(post.image_id, 170, 512, 'posts',
                                  community.low_quality)  # the 512 sized image is for masonry view
 
diff --git a/app/utils.py b/app/utils.py
index 14bfdc06..45b50df1 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -4,7 +4,6 @@ import bisect
 import hashlib
 import mimetypes
 import random
-import tempfile
 import urllib
 from collections import defaultdict
 from datetime import datetime, timedelta, date
@@ -13,11 +12,9 @@ from typing import List, Literal, Union
 
 import httpx
 import markdown2
-import math
 from urllib.parse import urlparse, parse_qs, urlencode
 from functools import wraps
 import flask
-import requests
 from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
 import warnings
 import jwt
@@ -34,7 +31,6 @@ from wtforms.fields  import SelectField, SelectMultipleField
 from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
 from app import db, cache, httpx_client
 import re
-from moviepy.editor import VideoFileClip
 from PIL import Image, ImageOps
 
 from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
@@ -1109,49 +1105,6 @@ def in_sorted_list(arr, target):
     return index < len(arr) and arr[index] == target
 
 
-# Makes a still image from a video url, without downloading the whole video file
-def generate_image_from_video_url(video_url, output_path, length=2):
-
-    response = requests.get(video_url, stream=True, timeout=5,
-                            headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0'})  # Imgur requires a user agent
-    content_type = response.headers.get('Content-Type')
-    if content_type:
-        if 'video/mp4' in content_type:
-            temp_file_extension = '.mp4'
-        elif 'video/webm' in content_type:
-            temp_file_extension = '.webm'
-        else:
-            raise ValueError("Unsupported video format")
-    else:
-        raise ValueError("Content-Type not found in response headers")
-
-    # Generate a random temporary file name
-    temp_file_name = gibberish(15) + temp_file_extension
-    temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
-
-    # Write the downloaded data to a temporary file
-    with open(temp_file_path, 'wb') as f:
-        for chunk in response.iter_content(chunk_size=4096):
-            f.write(chunk)
-            if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
-                break
-
-    # Generate thumbnail from the temporary file
-    try:
-        clip = VideoFileClip(temp_file_path)
-    except Exception as e:
-        os.unlink(temp_file_path)
-        raise e
-    thumbnail = clip.get_frame(0)
-    clip.close()
-
-    # Save the image
-    thumbnail_image = Image.fromarray(thumbnail)
-    thumbnail_image.save(output_path)
-
-    os.remove(temp_file_path)
-
-
 @cache.memoize(timeout=600)
 def recently_upvoted_posts(user_id) -> List[int]:
     post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),
diff --git a/requirements.txt b/requirements.txt
index 3f0cbefd..d2a5bcca 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -32,4 +32,3 @@ Werkzeug==2.3.3
 pytesseract==0.3.10
 sentry-sdk==1.40.6
 python-slugify==8.0.4
-moviepy==1.0.3