mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 11:26:56 -08:00
remove moviepy and ffmpeg dependency
This commit is contained in:
parent
40777cd390
commit
d73c12d4c7
5 changed files with 89 additions and 180 deletions
|
@ -4,7 +4,7 @@ FROM --platform=$BUILDPLATFORM python:3-alpine AS builder
|
||||||
|
|
||||||
RUN apk update
|
RUN apk update
|
||||||
RUN apk add pkgconfig
|
RUN apk add pkgconfig
|
||||||
RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng ffmpeg
|
RUN apk add --virtual build-deps gcc python3-dev musl-dev tesseract-ocr tesseract-ocr-data-eng
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|
|
@ -10,7 +10,6 @@ import httpx
|
||||||
import redis
|
import redis
|
||||||
from flask import current_app, request, g, url_for, json
|
from flask import current_app, request, g, url_for, json
|
||||||
from flask_babel import _
|
from flask_babel import _
|
||||||
from requests import JSONDecodeError
|
|
||||||
from sqlalchemy import text, func, desc
|
from sqlalchemy import text, func, desc
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ import pytesseract
|
||||||
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
|
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
|
||||||
is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
|
is_image_url, domain_from_url, gibberish, ensure_directory_exists, head_request, \
|
||||||
shorten_string, remove_tracking_from_link, \
|
shorten_string, remove_tracking_from_link, \
|
||||||
microblog_content_to_title, generate_image_from_video_url, is_video_url, \
|
microblog_content_to_title, is_video_url, \
|
||||||
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
|
notification_subscribers, communities_banned_from, actor_contains_blocked_words, \
|
||||||
html_to_text, add_to_modlog_activitypub, joined_communities, \
|
html_to_text, add_to_modlog_activitypub, joined_communities, \
|
||||||
moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
|
moderating_communities, get_task_session, is_video_hosting_site, opengraph_parse
|
||||||
|
@ -1009,148 +1008,106 @@ def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory, to
|
||||||
session = get_task_session()
|
session = get_task_session()
|
||||||
file: File = session.query(File).get(file_id)
|
file: File = session.query(File).get(file_id)
|
||||||
if file and file.source_url:
|
if file and file.source_url:
|
||||||
# Videos (old code. not invoked because file.source_url won't end .mp4 or .webm)
|
try:
|
||||||
if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'):
|
source_image_response = get_request(file.source_url)
|
||||||
new_filename = gibberish(15)
|
except:
|
||||||
|
pass
|
||||||
# set up the storage directory
|
|
||||||
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
|
||||||
ensure_directory_exists(directory)
|
|
||||||
|
|
||||||
# file path and names to store the resized images on disk
|
|
||||||
final_place = os.path.join(directory, new_filename + '.jpg')
|
|
||||||
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
|
||||||
try:
|
|
||||||
generate_image_from_video_url(file.source_url, final_place)
|
|
||||||
except Exception as e:
|
|
||||||
return
|
|
||||||
|
|
||||||
if final_place:
|
|
||||||
image = Image.open(final_place)
|
|
||||||
img_width = image.width
|
|
||||||
|
|
||||||
# Resize the image to medium
|
|
||||||
if medium_width:
|
|
||||||
if img_width > medium_width:
|
|
||||||
image.thumbnail((medium_width, medium_width))
|
|
||||||
image.save(final_place)
|
|
||||||
file.file_path = final_place
|
|
||||||
file.width = image.width
|
|
||||||
file.height = image.height
|
|
||||||
|
|
||||||
# Resize the image to a thumbnail (webp)
|
|
||||||
if thumbnail_width:
|
|
||||||
if img_width > thumbnail_width:
|
|
||||||
image.thumbnail((thumbnail_width, thumbnail_width))
|
|
||||||
image.save(final_place_thumbnail, format="WebP", quality=93)
|
|
||||||
file.thumbnail_path = final_place_thumbnail
|
|
||||||
file.thumbnail_width = image.width
|
|
||||||
file.thumbnail_height = image.height
|
|
||||||
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
# Images
|
|
||||||
else:
|
else:
|
||||||
try:
|
if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url:
|
||||||
source_image_response = get_request(file.source_url)
|
source_image_response.close()
|
||||||
except:
|
# Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
|
||||||
pass
|
# The un-proxied image url is the query parameter called 'url'
|
||||||
else:
|
parsed_url = urlparse(file.source_url)
|
||||||
if source_image_response.status_code == 404 and '/api/v3/image_proxy' in file.source_url:
|
query_params = parse_qs(parsed_url.query)
|
||||||
source_image_response.close()
|
if 'url' in query_params:
|
||||||
# Lemmy failed to retrieve the image but we might have better luck. Example source_url: https://slrpnk.net/api/v3/image_proxy?url=https%3A%2F%2Fi.guim.co.uk%2Fimg%2Fmedia%2F24e87cb4d730141848c339b3b862691ca536fb26%2F0_164_3385_2031%2Fmaster%2F3385.jpg%3Fwidth%3D1200%26height%3D630%26quality%3D85%26auto%3Dformat%26fit%3Dcrop%26overlay-align%3Dbottom%252Cleft%26overlay-width%3D100p%26overlay-base64%3DL2ltZy9zdGF0aWMvb3ZlcmxheXMvdGctZGVmYXVsdC5wbmc%26enable%3Dupscale%26s%3D0ec9d25a8cb5db9420471054e26cfa63
|
url_value = query_params['url'][0]
|
||||||
# The un-proxied image url is the query parameter called 'url'
|
source_image_response = get_request(url_value)
|
||||||
parsed_url = urlparse(file.source_url)
|
else:
|
||||||
query_params = parse_qs(parsed_url.query)
|
source_image_response = None
|
||||||
if 'url' in query_params:
|
if source_image_response and source_image_response.status_code == 200:
|
||||||
url_value = query_params['url'][0]
|
content_type = source_image_response.headers.get('content-type')
|
||||||
source_image_response = get_request(url_value)
|
if content_type:
|
||||||
else:
|
if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
|
||||||
source_image_response = None
|
source_image = source_image_response.content
|
||||||
if source_image_response and source_image_response.status_code == 200:
|
source_image_response.close()
|
||||||
content_type = source_image_response.headers.get('content-type')
|
|
||||||
if content_type:
|
|
||||||
if content_type.startswith('image') or (content_type == 'application/octet-stream' and file.source_url.endswith('.avif')):
|
|
||||||
source_image = source_image_response.content
|
|
||||||
source_image_response.close()
|
|
||||||
|
|
||||||
content_type_parts = content_type.split('/')
|
content_type_parts = content_type.split('/')
|
||||||
if content_type_parts:
|
if content_type_parts:
|
||||||
# content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
|
# content type headers often are just 'image/jpeg' but sometimes 'image/jpeg;charset=utf8'
|
||||||
|
|
||||||
# Remove ;charset=whatever
|
# Remove ;charset=whatever
|
||||||
main_part = content_type.split(';')[0]
|
main_part = content_type.split(';')[0]
|
||||||
|
|
||||||
# Split the main part on the '/' character and take the second part
|
# Split the main part on the '/' character and take the second part
|
||||||
file_ext = '.' + main_part.split('/')[1]
|
file_ext = '.' + main_part.split('/')[1]
|
||||||
file_ext = file_ext.strip() # just to be sure
|
file_ext = file_ext.strip() # just to be sure
|
||||||
|
|
||||||
if file_ext == '.jpeg':
|
if file_ext == '.jpeg':
|
||||||
file_ext = '.jpg'
|
file_ext = '.jpg'
|
||||||
elif file_ext == '.svg+xml':
|
elif file_ext == '.svg+xml':
|
||||||
return # no need to resize SVG images
|
return # no need to resize SVG images
|
||||||
elif file_ext == '.octet-stream':
|
elif file_ext == '.octet-stream':
|
||||||
file_ext = '.avif'
|
file_ext = '.avif'
|
||||||
else:
|
else:
|
||||||
file_ext = os.path.splitext(file.source_url)[1]
|
file_ext = os.path.splitext(file.source_url)[1]
|
||||||
file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly
|
file_ext = file_ext.replace('%3f', '?') # sometimes urls are not decoded properly
|
||||||
if '?' in file_ext:
|
if '?' in file_ext:
|
||||||
file_ext = file_ext.split('?')[0]
|
file_ext = file_ext.split('?')[0]
|
||||||
|
|
||||||
new_filename = gibberish(15)
|
new_filename = gibberish(15)
|
||||||
|
|
||||||
# set up the storage directory
|
# set up the storage directory
|
||||||
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||||
ensure_directory_exists(directory)
|
ensure_directory_exists(directory)
|
||||||
|
|
||||||
# file path and names to store the resized images on disk
|
# file path and names to store the resized images on disk
|
||||||
final_place = os.path.join(directory, new_filename + file_ext)
|
final_place = os.path.join(directory, new_filename + file_ext)
|
||||||
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
||||||
|
|
||||||
if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary
|
if file_ext == '.avif': # this is quite a big plugin so we'll only load it if necessary
|
||||||
import pillow_avif
|
import pillow_avif
|
||||||
|
|
||||||
# Load image data into Pillow
|
# Load image data into Pillow
|
||||||
Image.MAX_IMAGE_PIXELS = 89478485
|
Image.MAX_IMAGE_PIXELS = 89478485
|
||||||
image = Image.open(BytesIO(source_image))
|
image = Image.open(BytesIO(source_image))
|
||||||
image = ImageOps.exif_transpose(image)
|
image = ImageOps.exif_transpose(image)
|
||||||
img_width = image.width
|
img_width = image.width
|
||||||
img_height = image.height
|
img_height = image.height
|
||||||
|
|
||||||
# Resize the image to medium
|
# Resize the image to medium
|
||||||
if medium_width:
|
if medium_width:
|
||||||
if img_width > medium_width:
|
if img_width > medium_width:
|
||||||
image.thumbnail((medium_width, medium_width))
|
image.thumbnail((medium_width, medium_width))
|
||||||
image.save(final_place)
|
image.save(final_place)
|
||||||
file.file_path = final_place
|
file.file_path = final_place
|
||||||
file.width = image.width
|
file.width = image.width
|
||||||
file.height = image.height
|
file.height = image.height
|
||||||
|
|
||||||
# Resize the image to a thumbnail (webp)
|
# Resize the image to a thumbnail (webp)
|
||||||
if thumbnail_width:
|
if thumbnail_width:
|
||||||
if img_width > thumbnail_width:
|
if img_width > thumbnail_width:
|
||||||
image.thumbnail((thumbnail_width, thumbnail_width))
|
image.thumbnail((thumbnail_width, thumbnail_width))
|
||||||
image.save(final_place_thumbnail, format="WebP", quality=93)
|
image.save(final_place_thumbnail, format="WebP", quality=93)
|
||||||
file.thumbnail_path = final_place_thumbnail
|
file.thumbnail_path = final_place_thumbnail
|
||||||
file.thumbnail_width = image.width
|
file.thumbnail_width = image.width
|
||||||
file.thumbnail_height = image.height
|
file.thumbnail_height = image.height
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
# Alert regarding fascist meme content
|
# Alert regarding fascist meme content
|
||||||
if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
|
if toxic_community and img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
|
||||||
try:
|
try:
|
||||||
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
|
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
image_text = ''
|
image_text = ''
|
||||||
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
|
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
|
||||||
post = Post.query.filter_by(image_id=file.id).first()
|
post = Post.query.filter_by(image_id=file.id).first()
|
||||||
notification = Notification(title='Review this',
|
notification = Notification(title='Review this',
|
||||||
user_id=1,
|
user_id=1,
|
||||||
author_id=post.user_id,
|
author_id=post.user_id,
|
||||||
url=url_for('activitypub.post_ap', post_id=post.id))
|
url=url_for('activitypub.post_ap', post_id=post.id))
|
||||||
session.add(notification)
|
session.add(notification)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
|
|
||||||
def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]:
|
def find_reply_parent(in_reply_to: str) -> Tuple[int, int, int]:
|
||||||
|
|
|
@ -1364,7 +1364,7 @@ class Post(db.Model):
|
||||||
i += 1
|
i += 1
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
if post.image_id:
|
if post.image_id and not post.type == constants.POST_TYPE_VIDEO:
|
||||||
make_image_sizes(post.image_id, 170, 512, 'posts',
|
make_image_sizes(post.image_id, 170, 512, 'posts',
|
||||||
community.low_quality) # the 512 sized image is for masonry view
|
community.low_quality) # the 512 sized image is for masonry view
|
||||||
|
|
||||||
|
|
47
app/utils.py
47
app/utils.py
|
@ -4,7 +4,6 @@ import bisect
|
||||||
import hashlib
|
import hashlib
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import random
|
import random
|
||||||
import tempfile
|
|
||||||
import urllib
|
import urllib
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime, timedelta, date
|
from datetime import datetime, timedelta, date
|
||||||
|
@ -13,11 +12,9 @@ from typing import List, Literal, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import markdown2
|
import markdown2
|
||||||
import math
|
|
||||||
from urllib.parse import urlparse, parse_qs, urlencode
|
from urllib.parse import urlparse, parse_qs, urlencode
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
import flask
|
import flask
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||||
import warnings
|
import warnings
|
||||||
import jwt
|
import jwt
|
||||||
|
@ -34,7 +31,6 @@ from wtforms.fields import SelectField, SelectMultipleField
|
||||||
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
||||||
from app import db, cache, httpx_client
|
from app import db, cache, httpx_client
|
||||||
import re
|
import re
|
||||||
from moviepy.editor import VideoFileClip
|
|
||||||
from PIL import Image, ImageOps
|
from PIL import Image, ImageOps
|
||||||
|
|
||||||
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
||||||
|
@ -1109,49 +1105,6 @@ def in_sorted_list(arr, target):
|
||||||
return index < len(arr) and arr[index] == target
|
return index < len(arr) and arr[index] == target
|
||||||
|
|
||||||
|
|
||||||
# Makes a still image from a video url, without downloading the whole video file
|
|
||||||
def generate_image_from_video_url(video_url, output_path, length=2):
|
|
||||||
|
|
||||||
response = requests.get(video_url, stream=True, timeout=5,
|
|
||||||
headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0'}) # Imgur requires a user agent
|
|
||||||
content_type = response.headers.get('Content-Type')
|
|
||||||
if content_type:
|
|
||||||
if 'video/mp4' in content_type:
|
|
||||||
temp_file_extension = '.mp4'
|
|
||||||
elif 'video/webm' in content_type:
|
|
||||||
temp_file_extension = '.webm'
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported video format")
|
|
||||||
else:
|
|
||||||
raise ValueError("Content-Type not found in response headers")
|
|
||||||
|
|
||||||
# Generate a random temporary file name
|
|
||||||
temp_file_name = gibberish(15) + temp_file_extension
|
|
||||||
temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
|
|
||||||
|
|
||||||
# Write the downloaded data to a temporary file
|
|
||||||
with open(temp_file_path, 'wb') as f:
|
|
||||||
for chunk in response.iter_content(chunk_size=4096):
|
|
||||||
f.write(chunk)
|
|
||||||
if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Generate thumbnail from the temporary file
|
|
||||||
try:
|
|
||||||
clip = VideoFileClip(temp_file_path)
|
|
||||||
except Exception as e:
|
|
||||||
os.unlink(temp_file_path)
|
|
||||||
raise e
|
|
||||||
thumbnail = clip.get_frame(0)
|
|
||||||
clip.close()
|
|
||||||
|
|
||||||
# Save the image
|
|
||||||
thumbnail_image = Image.fromarray(thumbnail)
|
|
||||||
thumbnail_image.save(output_path)
|
|
||||||
|
|
||||||
os.remove(temp_file_path)
|
|
||||||
|
|
||||||
|
|
||||||
@cache.memoize(timeout=600)
|
@cache.memoize(timeout=600)
|
||||||
def recently_upvoted_posts(user_id) -> List[int]:
|
def recently_upvoted_posts(user_id) -> List[int]:
|
||||||
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),
|
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),
|
||||||
|
|
|
@ -32,4 +32,3 @@ Werkzeug==2.3.3
|
||||||
pytesseract==0.3.10
|
pytesseract==0.3.10
|
||||||
sentry-sdk==1.40.6
|
sentry-sdk==1.40.6
|
||||||
python-slugify==8.0.4
|
python-slugify==8.0.4
|
||||||
moviepy==1.0.3
|
|
||||||
|
|
Loading…
Reference in a new issue