embed video and generate still thumbnail

This commit is contained in:
rimu 2024-04-16 16:35:12 +12:00
parent 01a235725e
commit abd4dd16c9
9 changed files with 193 additions and 85 deletions

View file

@ -27,7 +27,7 @@ import pytesseract
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \ is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \ shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
blocked_phrases, microblog_content_to_title blocked_phrases, microblog_content_to_title, generate_image_from_video_url
def public_key(): def public_key():
@ -738,78 +738,117 @@ def make_image_sizes(file_id, thumbnail_width=50, medium_width=120, directory='p
def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory): def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory):
file = File.query.get(file_id) file = File.query.get(file_id)
if file and file.source_url: if file and file.source_url:
try: # Videos
source_image_response = get_request(file.source_url) if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'):
except: new_filename = gibberish(15)
pass
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + '.jpg')
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
generate_image_from_video_url(file.source_url, final_place)
image = Image.open(final_place)
img_width = image.width
# Resize the image to medium
if medium_width:
if img_width > medium_width:
image.thumbnail((medium_width, medium_width))
image.save(final_place)
file.file_path = final_place
file.width = image.width
file.height = image.height
# Resize the image to a thumbnail (webp)
if thumbnail_width:
if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width
file.thumbnail_height = image.height
db.session.commit()
# Images
else: else:
if source_image_response.status_code == 200: try:
content_type = source_image_response.headers.get('content-type') source_image_response = get_request(file.source_url)
if content_type and content_type.startswith('image'): except:
source_image = source_image_response.content pass
source_image_response.close() else:
if source_image_response.status_code == 200:
content_type = source_image_response.headers.get('content-type')
if content_type and content_type.startswith('image'):
source_image = source_image_response.content
source_image_response.close()
file_ext = os.path.splitext(file.source_url)[1] file_ext = os.path.splitext(file.source_url)[1]
# fall back to parsing the http content type if the url does not contain a file extension # fall back to parsing the http content type if the url does not contain a file extension
if file_ext == '': if file_ext == '':
content_type_parts = content_type.split('/') content_type_parts = content_type.split('/')
if content_type_parts: if content_type_parts:
file_ext = '.' + content_type_parts[-1] file_ext = '.' + content_type_parts[-1]
else: else:
if '?' in file_ext: if '?' in file_ext:
file_ext = file_ext.split('?')[0] file_ext = file_ext.split('?')[0]
new_filename = gibberish(15) new_filename = gibberish(15)
# set up the storage directory # set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4] directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory) ensure_directory_exists(directory)
# file path and names to store the resized images on disk # file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + file_ext) final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp') final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
# Load image data into Pillow # Load image data into Pillow
Image.MAX_IMAGE_PIXELS = 89478485 Image.MAX_IMAGE_PIXELS = 89478485
image = Image.open(BytesIO(source_image)) image = Image.open(BytesIO(source_image))
image = ImageOps.exif_transpose(image) image = ImageOps.exif_transpose(image)
img_width = image.width img_width = image.width
img_height = image.height img_height = image.height
# Resize the image to medium # Resize the image to medium
if medium_width: if medium_width:
if img_width > medium_width: if img_width > medium_width:
image.thumbnail((medium_width, medium_width)) image.thumbnail((medium_width, medium_width))
image.save(final_place) image.save(final_place)
file.file_path = final_place file.file_path = final_place
file.width = image.width file.width = image.width
file.height = image.height file.height = image.height
# Resize the image to a thumbnail (webp) # Resize the image to a thumbnail (webp)
if thumbnail_width: if thumbnail_width:
if img_width > thumbnail_width: if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width)) image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93) image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width file.thumbnail_width = image.width
file.thumbnail_height = image.height file.thumbnail_height = image.height
db.session.commit() db.session.commit()
# Alert regarding fascist meme content # Alert regarding fascist meme content
if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots. if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
try: try:
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30) image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
except FileNotFoundError as e: except FileNotFoundError as e:
image_text = '' image_text = ''
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
post = Post.query.filter_by(image_id=file.id).first() post = Post.query.filter_by(image_id=file.id).first()
notification = Notification(title='Review this', notification = Notification(title='Review this',
user_id=1, user_id=1,
author_id=post.user_id, author_id=post.user_id,
url=url_for('activitypub.post_ap', post_id=post.id)) url=url_for('activitypub.post_ap', post_id=post.id))
db.session.add(notification) db.session.add(notification)
db.session.commit() db.session.commit()
# create a summary from markdown if present, otherwise use html if available # create a summary from markdown if present, otherwise use html if available

View file

@ -112,7 +112,8 @@ def retrieve_mods_and_backfill(community_id: int):
post.ranking = post_ranking(post.score, post.posted_at) post.ranking = post_ranking(post.score, post.posted_at)
if post.url: if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url, other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url,
Post.posted_at > post.posted_at - timedelta(days=3), Post.posted_at < post.posted_at + timedelta(days=3)).all() Post.posted_at > post.posted_at - timedelta(days=3),
Post.posted_at < post.posted_at + timedelta(days=3)).all()
for op in other_posts: for op in other_posts:
if op.cross_posts is None: if op.cross_posts is None:
op.cross_posts = [post.id] op.cross_posts = [post.id]
@ -223,26 +224,31 @@ def save_post(form, post: Post, type: str):
remove_old_file(post.image_id) remove_old_file(post.image_id)
post.image_id = None post.image_id = None
unused, file_extension = os.path.splitext(form.link_url.data) if post.url.endswith('.mp4') or post.url.endswith('.webm'):
# this url is a link to an image - turn it into a image post file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image
if file_extension.lower() in allowed_extensions:
file = File(source_url=form.link_url.data)
post.image = file post.image = file
db.session.add(file) db.session.add(file)
post.type = POST_TYPE_IMAGE
else: else:
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag unused, file_extension = os.path.splitext(form.link_url.data)
opengraph = opengraph_parse(form.link_url.data) # this url is a link to an image - turn it into a image post
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): if file_extension.lower() in allowed_extensions:
filename = opengraph.get('og:image') or opengraph.get('og:image:url') file = File(source_url=form.link_url.data)
filename_for_extension = filename.split('?')[0] if '?' in filename else filename post.image = file
unused, file_extension = os.path.splitext(filename_for_extension) db.session.add(file)
if file_extension.lower() in allowed_extensions and not filename.startswith('/'): post.type = POST_TYPE_IMAGE
file = url_to_thumbnail_file(filename) else:
if file: # check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
file.alt_text = shorten_string(opengraph.get('og:title'), 295) opengraph = opengraph_parse(form.link_url.data)
post.image = file if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
db.session.add(file) filename = opengraph.get('og:image') or opengraph.get('og:image:url')
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
unused, file_extension = os.path.splitext(filename_for_extension)
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
file = url_to_thumbnail_file(filename)
if file:
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
post.image = file
db.session.add(file)
elif type == 'image': elif type == 'image':
post.title = form.image_title.data post.title = form.image_title.data

View file

@ -25,7 +25,8 @@ from sqlalchemy_searchable import search
from app.utils import render_template, get_setting, gibberish, request_etag_matches, return_304, blocked_domains, \ from app.utils import render_template, get_setting, gibberish, request_etag_matches, return_304, blocked_domains, \
ap_datetime, ip_address, retrieve_block_list, shorten_string, markdown_to_text, user_filters_home, \ ap_datetime, ip_address, retrieve_block_list, shorten_string, markdown_to_text, user_filters_home, \
joined_communities, moderating_communities, parse_page, theme_list, get_request, markdown_to_html, allowlist_html, \ joined_communities, moderating_communities, parse_page, theme_list, get_request, markdown_to_html, allowlist_html, \
blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts, \
generate_image_from_video_url
from app.models import Community, CommunityMember, Post, Site, User, utcnow, Domain, Topic, File, Instance, \ from app.models import Community, CommunityMember, Post, Site, User, utcnow, Domain, Topic, File, Instance, \
InstanceRole, Notification InstanceRole, Notification
from PIL import Image from PIL import Image

View file

@ -1384,4 +1384,9 @@ h1 .warning_badge {
max-width: 100%; max-width: 100%;
} }
.responsive-video {
max-width: 100%;
max-height: 90vh;
}
/*# sourceMappingURL=structure.css.map */ /*# sourceMappingURL=structure.css.map */

View file

@ -1057,4 +1057,9 @@ h1 .warning_badge {
line-height: initial; line-height: initial;
max-width: 100%; max-width: 100%;
} }
}
.responsive-video {
max-width: 100%;
max-height: 90vh;
} }

View file

@ -83,6 +83,15 @@
<span class="fe fe-external"></span></a></p> <span class="fe fe-external"></span></a></p>
{% if post.url.endswith('.mp3') %} {% if post.url.endswith('.mp3') %}
<p><audio controls preload="{{ 'none' if low_bandwidth else 'metadata' }}" src="{{ post.url }}"></audio></p> <p><audio controls preload="{{ 'none' if low_bandwidth else 'metadata' }}" src="{{ post.url }}"></audio></p>
{% elif post.url.endswith('.mp4') or post.url.endswith('.webm') %}
<p>
<video class="responsive-video" controls preload="{{ 'metadata' if low_bandwidth else 'auto' }}">
{% if post.url.endswith('.mp4') %}
<source src="{{ post.url }}" media="video/mp4" />
{% elif post.url.endswith('.webm') %}
<source src="{{ post.url }}" media="video/webm" />
{% endif %}
</video></p>
{% endif %} {% endif %}
{% if 'youtube.com' in post.url %} {% if 'youtube.com' in post.url %}
<p><a href="https://piped.video/watch?v={{ post.youtube_embed() }}">{{ _('Watch on piped.video') }} <span class="fe fe-external"></span></a></p> <p><a href="https://piped.video/watch?v={{ post.youtube_embed() }}">{{ _('Watch on piped.video') }} <span class="fe fe-external"></span></a></p>

View file

@ -4,6 +4,7 @@ import bisect
import hashlib import hashlib
import mimetypes import mimetypes
import random import random
import tempfile
import urllib import urllib
from collections import defaultdict from collections import defaultdict
from datetime import datetime, timedelta, date from datetime import datetime, timedelta, date
@ -14,7 +15,7 @@ import math
from urllib.parse import urlparse, parse_qs, urlencode from urllib.parse import urlparse, parse_qs, urlencode
from functools import wraps from functools import wraps
import flask import flask
from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
import warnings import warnings
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
import requests import requests
@ -26,6 +27,8 @@ from wtforms.fields import SelectField, SelectMultipleField
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
from app import db, cache from app import db, cache
import re import re
from moviepy.editor import VideoFileClip
from PIL import Image
from app.email import send_welcome_email from app.email import send_welcome_email
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \ from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
@ -881,6 +884,44 @@ def in_sorted_list(arr, target):
return index < len(arr) and arr[index] == target return index < len(arr) and arr[index] == target
# Makes a still image from a video url, without downloading the whole video file
def generate_image_from_video_url(video_url, output_path, length=2):
response = requests.get(video_url, stream=True)
content_type = response.headers.get('Content-Type')
if content_type:
if 'video/mp4' in content_type:
temp_file_extension = '.mp4'
elif 'video/webm' in content_type:
temp_file_extension = '.webm'
else:
raise ValueError("Unsupported video format")
else:
raise ValueError("Content-Type not found in response headers")
# Generate a random temporary file name
temp_file_name = gibberish(15) + temp_file_extension
temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
# Write the downloaded data to a temporary file
with open(temp_file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=4096):
f.write(chunk)
if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
break
# Generate thumbnail from the temporary file
clip = VideoFileClip(temp_file_path)
thumbnail = clip.get_frame(0)
clip.close()
# Save the image
thumbnail_image = Image.fromarray(thumbnail)
thumbnail_image.save(output_path)
os.remove(temp_file_path)
@cache.memoize(timeout=600) @cache.memoize(timeout=600)
def recently_upvoted_posts(user_id) -> List[int]: def recently_upvoted_posts(user_id) -> List[int]:
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'), post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),

View file

@ -34,9 +34,10 @@ time of things.
# Coding Standards / Guidelines # Coding Standards / Guidelines
**[PEP 8](https://peps.python.org/pep-0008/)** covers the basics. PyCharm encourages this by default - **[PEP 8](https://peps.python.org/pep-0008/)** covers the basics. PyCharm encourages this by default -
VS Code coders are encouraged to try the free community edition of PyCharm but it is by no means required. VS Code coders may like to try the free community edition of PyCharm but it is by no means required.
Use PEP 8 conventions for line length, naming, indentation. Use descriptive commit messages. Use PEP 8 conventions for naming, indentation. Use descriptive commit messages. Try to limit lines of code
to a length of roughly 120 characters.
Database model classes are singular. As in "Car", not "Cars". Database model classes are singular. As in "Car", not "Cars".

View file

@ -32,3 +32,4 @@ Werkzeug==2.3.3
pytesseract==0.3.10 pytesseract==0.3.10
sentry-sdk==1.40.6 sentry-sdk==1.40.6
python-slugify==8.0.4 python-slugify==8.0.4
moviepy==1.0.3