From abd4dd16c9dad194e447d018ec14a3dc271007a6 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:35:12 +1200 Subject: [PATCH] embed video and generate still thumbnail --- app/activitypub/util.py | 165 +++++++++++++++--------- app/community/util.py | 42 +++--- app/main/routes.py | 3 +- app/static/structure.css | 5 + app/static/structure.scss | 5 + app/templates/post/_post_full.html | 9 ++ app/utils.py | 43 +++++- docs/project_management/contributing.md | 5 +- requirements.txt | 1 + 9 files changed, 193 insertions(+), 85 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 1a463a79..277641bb 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -27,7 +27,7 @@ import pytesseract from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ - blocked_phrases, microblog_content_to_title + blocked_phrases, microblog_content_to_title, generate_image_from_video_url def public_key(): @@ -738,78 +738,117 @@ def make_image_sizes(file_id, thumbnail_width=50, medium_width=120, directory='p def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory): file = File.query.get(file_id) if file and file.source_url: - try: - source_image_response = get_request(file.source_url) - except: - pass + # Videos + if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'): + new_filename = gibberish(15) + + # set up the storage directory + directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] + ensure_directory_exists(directory) + + # file path and names to store the resized images on disk + final_place = os.path.join(directory, new_filename + '.jpg') + final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') + + generate_image_from_video_url(file.source_url, final_place) + + image = Image.open(final_place) + img_width = image.width + + # Resize the image to medium + if medium_width: + if img_width > medium_width: + image.thumbnail((medium_width, medium_width)) + image.save(final_place) + file.file_path = final_place + file.width = image.width + file.height = image.height + + # Resize the image to a thumbnail (webp) + if thumbnail_width: + if img_width > thumbnail_width: + image.thumbnail((thumbnail_width, thumbnail_width)) + image.save(final_place_thumbnail, format="WebP", quality=93) + file.thumbnail_path = final_place_thumbnail + file.thumbnail_width = image.width + file.thumbnail_height = image.height + + db.session.commit() + + # Images else: - if source_image_response.status_code == 200: - content_type = source_image_response.headers.get('content-type') - if content_type and content_type.startswith('image'): - source_image = source_image_response.content - source_image_response.close() + try: + source_image_response = get_request(file.source_url) + except: + pass + else: + if source_image_response.status_code == 200: + content_type = source_image_response.headers.get('content-type') + if content_type and content_type.startswith('image'): + source_image = source_image_response.content + source_image_response.close() - file_ext = os.path.splitext(file.source_url)[1] - # fall back to parsing the http content type if the url does not contain a file extension - if file_ext == '': - content_type_parts = content_type.split('/') - if content_type_parts: - file_ext = '.' + content_type_parts[-1] - else: - if '?' in file_ext: - file_ext = file_ext.split('?')[0] + file_ext = os.path.splitext(file.source_url)[1] + # fall back to parsing the http content type if the url does not contain a file extension + if file_ext == '': + content_type_parts = content_type.split('/') + if content_type_parts: + file_ext = '.' + content_type_parts[-1] + else: + if '?' in file_ext: + file_ext = file_ext.split('?')[0] - new_filename = gibberish(15) + new_filename = gibberish(15) - # set up the storage directory - directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] - ensure_directory_exists(directory) + # set up the storage directory + directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] + ensure_directory_exists(directory) - # file path and names to store the resized images on disk - final_place = os.path.join(directory, new_filename + file_ext) - final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') + # file path and names to store the resized images on disk + final_place = os.path.join(directory, new_filename + file_ext) + final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') - # Load image data into Pillow - Image.MAX_IMAGE_PIXELS = 89478485 - image = Image.open(BytesIO(source_image)) - image = ImageOps.exif_transpose(image) - img_width = image.width - img_height = image.height + # Load image data into Pillow + Image.MAX_IMAGE_PIXELS = 89478485 + image = Image.open(BytesIO(source_image)) + image = ImageOps.exif_transpose(image) + img_width = image.width + img_height = image.height - # Resize the image to medium - if medium_width: - if img_width > medium_width: - image.thumbnail((medium_width, medium_width)) - image.save(final_place) - file.file_path = final_place - file.width = image.width - file.height = image.height + # Resize the image to medium + if medium_width: + if img_width > medium_width: + image.thumbnail((medium_width, medium_width)) + image.save(final_place) + file.file_path = final_place + file.width = image.width + file.height = image.height - # Resize the image to a thumbnail (webp) - if thumbnail_width: - if img_width > thumbnail_width: - image.thumbnail((thumbnail_width, thumbnail_width)) - image.save(final_place_thumbnail, format="WebP", quality=93) - file.thumbnail_path = final_place_thumbnail - file.thumbnail_width = image.width - file.thumbnail_height = image.height + # Resize the image to a thumbnail (webp) + if thumbnail_width: + if img_width > thumbnail_width: + image.thumbnail((thumbnail_width, thumbnail_width)) + image.save(final_place_thumbnail, format="WebP", quality=93) + file.thumbnail_path = final_place_thumbnail + file.thumbnail_width = image.width + file.thumbnail_height = image.height - db.session.commit() + db.session.commit() - # Alert regarding fascist meme content - if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. - try: - image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) - except FileNotFoundError as e: - image_text = '' - if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' - post = Post.query.filter_by(image_id=file.id).first() - notification = Notification(title='Review this', - user_id=1, - author_id=post.user_id, - url=url_for('activitypub.post_ap', post_id=post.id)) - db.session.add(notification) - db.session.commit() + # Alert regarding fascist meme content + if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. + try: + image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) + except FileNotFoundError as e: + image_text = '' + if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' + post = Post.query.filter_by(image_id=file.id).first() + notification = Notification(title='Review this', + user_id=1, + author_id=post.user_id, + url=url_for('activitypub.post_ap', post_id=post.id)) + db.session.add(notification) + db.session.commit() # create a summary from markdown if present, otherwise use html if available diff --git a/app/community/util.py b/app/community/util.py index c58dcad9..7b438594 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -112,7 +112,8 @@ def retrieve_mods_and_backfill(community_id: int): post.ranking = post_ranking(post.score, post.posted_at) if post.url: other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, - Post.posted_at > post.posted_at - timedelta(days=3), Post.posted_at < post.posted_at + timedelta(days=3)).all() + Post.posted_at > post.posted_at - timedelta(days=3), + Post.posted_at < post.posted_at + timedelta(days=3)).all() for op in other_posts: if op.cross_posts is None: op.cross_posts = [post.id] @@ -223,26 +224,31 @@ def save_post(form, post: Post, type: str): remove_old_file(post.image_id) post.image_id = None - unused, file_extension = os.path.splitext(form.link_url.data) - # this url is a link to an image - turn it into a image post - if file_extension.lower() in allowed_extensions: - file = File(source_url=form.link_url.data) + if post.url.endswith('.mp4') or post.url.endswith('.webm'): + file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image post.image = file db.session.add(file) - post.type = POST_TYPE_IMAGE else: - # check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag - opengraph = opengraph_parse(form.link_url.data) - if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): - filename = opengraph.get('og:image') or opengraph.get('og:image:url') - filename_for_extension = filename.split('?')[0] if '?' in filename else filename - unused, file_extension = os.path.splitext(filename_for_extension) - if file_extension.lower() in allowed_extensions and not filename.startswith('/'): - file = url_to_thumbnail_file(filename) - if file: - file.alt_text = shorten_string(opengraph.get('og:title'), 295) - post.image = file - db.session.add(file) + unused, file_extension = os.path.splitext(form.link_url.data) + # this url is a link to an image - turn it into a image post + if file_extension.lower() in allowed_extensions: + file = File(source_url=form.link_url.data) + post.image = file + db.session.add(file) + post.type = POST_TYPE_IMAGE + else: + # check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag + opengraph = opengraph_parse(form.link_url.data) + if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): + filename = opengraph.get('og:image') or opengraph.get('og:image:url') + filename_for_extension = filename.split('?')[0] if '?' in filename else filename + unused, file_extension = os.path.splitext(filename_for_extension) + if file_extension.lower() in allowed_extensions and not filename.startswith('/'): + file = url_to_thumbnail_file(filename) + if file: + file.alt_text = shorten_string(opengraph.get('og:title'), 295) + post.image = file + db.session.add(file) elif type == 'image': post.title = form.image_title.data diff --git a/app/main/routes.py b/app/main/routes.py index d90f55d3..74bf30d7 100644 --- a/app/main/routes.py +++ b/app/main/routes.py @@ -25,7 +25,8 @@ from sqlalchemy_searchable import search from app.utils import render_template, get_setting, gibberish, request_etag_matches, return_304, blocked_domains, \ ap_datetime, ip_address, retrieve_block_list, shorten_string, markdown_to_text, user_filters_home, \ joined_communities, moderating_communities, parse_page, theme_list, get_request, markdown_to_html, allowlist_html, \ - blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts + blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts, \ + generate_image_from_video_url from app.models import Community, CommunityMember, Post, Site, User, utcnow, Domain, Topic, File, Instance, \ InstanceRole, Notification from PIL import Image diff --git a/app/static/structure.css b/app/static/structure.css index 64cbe713..b66f05f9 100644 --- a/app/static/structure.css +++ b/app/static/structure.css @@ -1384,4 +1384,9 @@ h1 .warning_badge { max-width: 100%; } +.responsive-video { + max-width: 100%; + max-height: 90vh; +} + /*# sourceMappingURL=structure.css.map */ diff --git a/app/static/structure.scss b/app/static/structure.scss index 8152ec92..79019d4f 100644 --- a/app/static/structure.scss +++ b/app/static/structure.scss @@ -1057,4 +1057,9 @@ h1 .warning_badge { line-height: initial; max-width: 100%; } +} + +.responsive-video { + max-width: 100%; + max-height: 90vh; } \ No newline at end of file diff --git a/app/templates/post/_post_full.html b/app/templates/post/_post_full.html index b0d1c618..891700f3 100644 --- a/app/templates/post/_post_full.html +++ b/app/templates/post/_post_full.html @@ -83,6 +83,15 @@
{% if post.url.endswith('.mp3') %} + {% elif post.url.endswith('.mp4') or post.url.endswith('.webm') %} ++
{% endif %} {% if 'youtube.com' in post.url %} diff --git a/app/utils.py b/app/utils.py index f7d427cf..c354b085 100644 --- a/app/utils.py +++ b/app/utils.py @@ -4,6 +4,7 @@ import bisect import hashlib import mimetypes import random +import tempfile import urllib from collections import defaultdict from datetime import datetime, timedelta, date @@ -14,7 +15,7 @@ import math from urllib.parse import urlparse, parse_qs, urlencode from functools import wraps import flask -from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning +from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning import warnings warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) import requests @@ -26,6 +27,8 @@ from wtforms.fields import SelectField, SelectMultipleField from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput from app import db, cache import re +from moviepy.editor import VideoFileClip +from PIL import Image from app.email import send_welcome_email from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \ @@ -881,6 +884,44 @@ def in_sorted_list(arr, target): return index < len(arr) and arr[index] == target +# Makes a still image from a video url, without downloading the whole video file +def generate_image_from_video_url(video_url, output_path, length=2): + + response = requests.get(video_url, stream=True) + content_type = response.headers.get('Content-Type') + if content_type: + if 'video/mp4' in content_type: + temp_file_extension = '.mp4' + elif 'video/webm' in content_type: + temp_file_extension = '.webm' + else: + raise ValueError("Unsupported video format") + else: + raise ValueError("Content-Type not found in response headers") + + # Generate a random temporary file name + temp_file_name = gibberish(15) + temp_file_extension + temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name) + + # Write the downloaded data to a temporary file + with open(temp_file_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=4096): + f.write(chunk) + if os.path.getsize(temp_file_path) >= length * 1024 * 1024: + break + + # Generate thumbnail from the temporary file + clip = VideoFileClip(temp_file_path) + thumbnail = clip.get_frame(0) + clip.close() + + # Save the image + thumbnail_image = Image.fromarray(thumbnail) + thumbnail_image.save(output_path) + + os.remove(temp_file_path) + + @cache.memoize(timeout=600) def recently_upvoted_posts(user_id) -> List[int]: post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'), diff --git a/docs/project_management/contributing.md b/docs/project_management/contributing.md index f09ea3d3..92bfeff1 100644 --- a/docs/project_management/contributing.md +++ b/docs/project_management/contributing.md @@ -34,9 +34,10 @@ time of things. # Coding Standards / Guidelines **[PEP 8](https://peps.python.org/pep-0008/)** covers the basics. PyCharm encourages this by default - -VS Code coders are encouraged to try the free community edition of PyCharm but it is by no means required. +VS Code coders may like to try the free community edition of PyCharm but it is by no means required. -Use PEP 8 conventions for line length, naming, indentation. Use descriptive commit messages. +Use PEP 8 conventions for naming, indentation. Use descriptive commit messages. Try to limit lines of code +to a length of roughly 120 characters. Database model classes are singular. As in "Car", not "Cars". diff --git a/requirements.txt b/requirements.txt index fad9be58..338bdd49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,3 +32,4 @@ Werkzeug==2.3.3 pytesseract==0.3.10 sentry-sdk==1.40.6 python-slugify==8.0.4 +moviepy==1.0.3