remove moviepy and ffmpeg dependency

This commit is contained in:
rimu 2024-12-16 20:49:43 +13:00
parent 40777cd390
commit d73c12d4c7
5 changed files with 89 additions and 180 deletions

View file

@ -4,7 +4,7 @@ FROM --platform=$BUILDPLATFORM python:3-alpine AS builder
RUN apk update RUN apk update
RUN apk add pkgconfig RUN apk add pkgconfig
RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng ffmpeg RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng
WORKDIR /app WORKDIR /app
COPY . /app COPY . /app

View file

@ -10,7 +10,6 @@ import httpx
import redis import redis
from flask import current_app, request, g, url_for, json from flask import current_app, request, g, url_for, json
from flask_babel import _ from flask_babel import _
from requests import JSONDecodeError
from sqlalchemy import text, func, desc from sqlalchemy import text, func, desc
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
@ -29,7 +28,7 @@ import pytesseract
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
shorten_string, remove_tracking_from_link, \ shorten_string, remove_tracking_from_link, \
microblog_content_to_title, generate_image_from_video_url, is_video_url, \ microblog_content_to_title, is_video_url, \
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \ notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
html_to_text, add_to_modlog_activitypub, joined_communities, \ html_to_text, add_to_modlog_activitypub, joined_communities, \
moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
@ -1009,148 +1008,106 @@ def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory, to
session = get_task_session() session = get_task_session()
file: File = session.query(File).get(file_id) file: File = session.query(File).get(file_id)
if file and file.source_url: if file and file.source_url:
# Videos (old code. not invoked because file.source_url won't end .mp4 or .webm) try:
if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'): source_image_response = get_request(file.source_url)
new_filename = gibberish(15) except:
pass
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + '.jpg')
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
try:
generate_image_from_video_url(file.source_url, final_place)
except Exception as e:
return
if final_place:
image = Image.open(final_place)
img_width = image.width
# Resize the image to medium
if medium_width:
if img_width > medium_width:
image.thumbnail((medium_width, medium_width))
image.save(final_place)
file.file_path = final_place
file.width = image.width
file.height = image.height
# Resize the image to a thumbnail (webp)
if thumbnail_width:
if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width
file.thumbnail_height = image.height
session.commit()
# Images
else: else:
try: if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url:
source_image_response = get_request(file.source_url) source_image_response.close()
except: # Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
pass # The un-proxied image url is the query parameter called 'url'
else: parsed_url = urlparse(file.source_url)
if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url: query_params = parse_qs(parsed_url.query)
source_image_response.close() if 'url' in query_params:
# Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63 url_value = query_params['url'][0]
# The un-proxied image url is the query parameter called 'url' source_image_response = get_request(url_value)
parsed_url = urlparse(file.source_url) else:
query_params = parse_qs(parsed_url.query) source_image_response = None
if 'url' in query_params: if source_image_response and source_image_response.status_code == 200:
url_value = query_params['url'][0] content_type = source_image_response.headers.get('content-type')
source_image_response = get_request(url_value) if content_type:
else: if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
source_image_response = None source_image = source_image_response.content
if source_image_response and source_image_response.status_code == 200: source_image_response.close()
content_type = source_image_response.headers.get('content-type')
if content_type:
if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
source_image = source_image_response.content
source_image_response.close()
content_type_parts = content_type.split('/') content_type_parts = content_type.split('/')
if content_type_parts: if content_type_parts:
# content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8' # content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
# Remove ;charset=whatever # Remove ;charset=whatever
main_part = content_type.split(';')[0] main_part = content_type.split(';')[0]
# Split the main part on the '/' character and take the second part # Split the main part on the '/' character and take the second part
file_ext = '.' + main_part.split('/')[1] file_ext = '.' + main_part.split('/')[1]
file_ext = file_ext.strip() # just to be sure file_ext = file_ext.strip() # just to be sure
if file_ext == '.jpeg': if file_ext == '.jpeg':
file_ext = '.jpg' file_ext = '.jpg'
elif file_ext == '.svg+xml': elif file_ext == '.svg+xml':
return # no need to resize SVG images return # no need to resize SVG images
elif file_ext == '.octet-stream': elif file_ext == '.octet-stream':
file_ext = '.avif' file_ext = '.avif'
else: else:
file_ext = os.path.splitext(file.source_url)[1] file_ext = os.path.splitext(file.source_url)[1]
file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly
if '?' in file_ext: if '?' in file_ext:
file_ext = file_ext.split('?')[0] file_ext = file_ext.split('?')[0]
new_filename = gibberish(15) new_filename = gibberish(15)
# set up the storage directory # set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory) ensure_directory_exists(directory)
# file path and names to store the resized images on disk # file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + file_ext) final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary
import pillow_avif import pillow_avif
# Load image data into Pillow # Load image data into Pillow
Image.MAX_IMAGE_PIXELS = 89478485 Image.MAX_IMAGE_PIXELS = 89478485
image = Image.open(BytesIO(source_image)) image = Image.open(BytesIO(source_image))
image = ImageOps.exif_transpose(image) image = ImageOps.exif_transpose(image)
img_width = image.width img_width = image.width
img_height = image.height img_height = image.height
# Resize the image to medium # Resize the image to medium
if medium_width: if medium_width:
if img_width > medium_width: if img_width > medium_width:
image.thumbnail((medium_width, medium_width)) image.thumbnail((medium_width, medium_width))
image.save(final_place) image.save(final_place)
file.file_path = final_place file.file_path = final_place
file.width = image.width file.width = image.width
file.height = image.height file.height = image.height
# Resize the image to a thumbnail (webp) # Resize the image to a thumbnail (webp)
if thumbnail_width: if thumbnail_width:
if img_width > thumbnail_width: if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width)) image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93) image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width file.thumbnail_width = image.width
file.thumbnail_height = image.height file.thumbnail_height = image.height
session.commit() session.commit()
# Alert regarding fascist meme content # Alert regarding fascist meme content
if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
try: try:
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
except Exception as e: except Exception as e:
image_text = '' image_text = ''
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
post = Post.query.filter_by(image_id=file.id).first() post = Post.query.filter_by(image_id=file.id).first()
notification = Notification(title='Review this', notification = Notification(title='Review this',
user_id=1, user_id=1,
author_id=post.user_id, author_id=post.user_id,
url=url_for('activitypub.post_ap', post_id=post.id)) url=url_for('activitypub.post_ap', post_id=post.id))
session.add(notification) session.add(notification)
session.commit() session.commit()
def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]: def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]:

View file

@ -1364,7 +1364,7 @@ class Post(db.Model):
i += 1 i += 1
db.session.commit() db.session.commit()
if post.image_id: if post.image_id and not post.type == constants.POST_TYPE_VIDEO:
make_image_sizes(post.image_id, 170, 512, 'posts', make_image_sizes(post.image_id, 170, 512, 'posts',
community.low_quality) # the 512 sized image is for masonry view community.low_quality) # the 512 sized image is for masonry view

View file

@ -4,7 +4,6 @@ import bisect
import hashlib import hashlib
import mimetypes import mimetypes
import random import random
import tempfile
import urllib import urllib
from collections import defaultdict from collections import defaultdict
from datetime import datetime, timedelta, date from datetime import datetime, timedelta, date
@ -13,11 +12,9 @@ from typing import List, Literal, Union
import httpx import httpx
import markdown2 import markdown2
import math
from urllib.parse import urlparse, parse_qs, urlencode from urllib.parse import urlparse, parse_qs, urlencode
from functools import wraps from functools import wraps
import flask import flask
import requests
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
import warnings import warnings
import jwt import jwt
@ -34,7 +31,6 @@ from wtforms.fields import SelectField, SelectMultipleField
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
from app import db, cache, httpx_client from app import db, cache, httpx_client
import re import re
from moviepy.editor import VideoFileClip
from PIL import Image, ImageOps from PIL import Image, ImageOps
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \ from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
@ -1109,49 +1105,6 @@ def in_sorted_list(arr, target):
return index < len(arr) and arr[index] == target return index < len(arr) and arr[index] == target
# Makes a still image from a video url, without downloading the whole video file
def generate_image_from_video_url(video_url, output_path, length=2):
response = requests.get(video_url, stream=True, timeout=5,
headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0'}) # Imgur requires a user agent
content_type = response.headers.get('Content-Type')
if content_type:
if 'video/mp4' in content_type:
temp_file_extension = '.mp4'
elif 'video/webm' in content_type:
temp_file_extension = '.webm'
else:
raise ValueError("Unsupported video format")
else:
raise ValueError("Content-Type not found in response headers")
# Generate a random temporary file name
temp_file_name = gibberish(15) + temp_file_extension
temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
# Write the downloaded data to a temporary file
with open(temp_file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=4096):
f.write(chunk)
if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
break
# Generate thumbnail from the temporary file
try:
clip = VideoFileClip(temp_file_path)
except Exception as e:
os.unlink(temp_file_path)
raise e
thumbnail = clip.get_frame(0)
clip.close()
# Save the image
thumbnail_image = Image.fromarray(thumbnail)
thumbnail_image.save(output_path)
os.remove(temp_file_path)
@cache.memoize(timeout=600) @cache.memoize(timeout=600)
def recently_upvoted_posts(user_id) -> List[int]: def recently_upvoted_posts(user_id) -> List[int]:
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'), post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),

View file

@ -32,4 +32,3 @@ Werkzeug==2.3.3
pytesseract==0.3.10 pytesseract==0.3.10
sentry-sdk==1.40.6 sentry-sdk==1.40.6
python-slugify==8.0.4 python-slugify==8.0.4
moviepy==1.0.3