embed video and generate still thumbnail

This commit is contained in:
rimu 2024-04-16 16:35:12 +12:00
parent 01a235725e
commit abd4dd16c9
9 changed files with 193 additions and 85 deletions

View file

@ -27,7 +27,7 @@ import pytesseract
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
blocked_phrases, microblog_content_to_title
blocked_phrases, microblog_content_to_title, generate_image_from_video_url
def public_key():
@ -738,78 +738,117 @@ def make_image_sizes(file_id, thumbnail_width=50, medium_width=120, directory='p
def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory):
file = File.query.get(file_id)
if file and file.source_url:
try:
source_image_response = get_request(file.source_url)
except:
pass
# Videos
if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'):
new_filename = gibberish(15)
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + '.jpg')
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
generate_image_from_video_url(file.source_url, final_place)
image = Image.open(final_place)
img_width = image.width
# Resize the image to medium
if medium_width:
if img_width > medium_width:
image.thumbnail((medium_width, medium_width))
image.save(final_place)
file.file_path = final_place
file.width = image.width
file.height = image.height
# Resize the image to a thumbnail (webp)
if thumbnail_width:
if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width
file.thumbnail_height = image.height
db.session.commit()
# Images
else:
if source_image_response.status_code == 200:
content_type = source_image_response.headers.get('content-type')
if content_type and content_type.startswith('image'):
source_image = source_image_response.content
source_image_response.close()
try:
source_image_response = get_request(file.source_url)
except:
pass
else:
if source_image_response.status_code == 200:
content_type = source_image_response.headers.get('content-type')
if content_type and content_type.startswith('image'):
source_image = source_image_response.content
source_image_response.close()
file_ext = os.path.splitext(file.source_url)[1]
# fall back to parsing the http content type if the url does not contain a file extension
if file_ext == '':
content_type_parts = content_type.split('/')
if content_type_parts:
file_ext = '.' + content_type_parts[-1]
else:
if '?' in file_ext:
file_ext = file_ext.split('?')[0]
file_ext = os.path.splitext(file.source_url)[1]
# fall back to parsing the http content type if the url does not contain a file extension
if file_ext == '':
content_type_parts = content_type.split('/')
if content_type_parts:
file_ext = '.' + content_type_parts[-1]
else:
if '?' in file_ext:
file_ext = file_ext.split('?')[0]
new_filename = gibberish(15)
new_filename = gibberish(15)
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# set up the storage directory
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
ensure_directory_exists(directory)
# file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
# file path and names to store the resized images on disk
final_place = os.path.join(directory, new_filename + file_ext)
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
# Load image data into Pillow
Image.MAX_IMAGE_PIXELS = 89478485
image = Image.open(BytesIO(source_image))
image = ImageOps.exif_transpose(image)
img_width = image.width
img_height = image.height
# Load image data into Pillow
Image.MAX_IMAGE_PIXELS = 89478485
image = Image.open(BytesIO(source_image))
image = ImageOps.exif_transpose(image)
img_width = image.width
img_height = image.height
# Resize the image to medium
if medium_width:
if img_width > medium_width:
image.thumbnail((medium_width, medium_width))
image.save(final_place)
file.file_path = final_place
file.width = image.width
file.height = image.height
# Resize the image to medium
if medium_width:
if img_width > medium_width:
image.thumbnail((medium_width, medium_width))
image.save(final_place)
file.file_path = final_place
file.width = image.width
file.height = image.height
# Resize the image to a thumbnail (webp)
if thumbnail_width:
if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width
file.thumbnail_height = image.height
# Resize the image to a thumbnail (webp)
if thumbnail_width:
if img_width > thumbnail_width:
image.thumbnail((thumbnail_width, thumbnail_width))
image.save(final_place_thumbnail, format="WebP", quality=93)
file.thumbnail_path = final_place_thumbnail
file.thumbnail_width = image.width
file.thumbnail_height = image.height
db.session.commit()
db.session.commit()
# Alert regarding fascist meme content
if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
try:
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
except FileNotFoundError as e:
image_text = ''
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
post = Post.query.filter_by(image_id=file.id).first()
notification = Notification(title='Review this',
user_id=1,
author_id=post.user_id,
url=url_for('activitypub.post_ap', post_id=post.id))
db.session.add(notification)
db.session.commit()
# Alert regarding fascist meme content
if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
try:
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
except FileNotFoundError as e:
image_text = ''
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
post = Post.query.filter_by(image_id=file.id).first()
notification = Notification(title='Review this',
user_id=1,
author_id=post.user_id,
url=url_for('activitypub.post_ap', post_id=post.id))
db.session.add(notification)
db.session.commit()
# create a summary from markdown if present, otherwise use html if available

View file

@ -112,7 +112,8 @@ def retrieve_mods_and_backfill(community_id: int):
post.ranking = post_ranking(post.score, post.posted_at)
if post.url:
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url,
Post.posted_at > post.posted_at - timedelta(days=3), Post.posted_at < post.posted_at + timedelta(days=3)).all()
Post.posted_at > post.posted_at - timedelta(days=3),
Post.posted_at < post.posted_at + timedelta(days=3)).all()
for op in other_posts:
if op.cross_posts is None:
op.cross_posts = [post.id]
@ -223,26 +224,31 @@ def save_post(form, post: Post, type: str):
remove_old_file(post.image_id)
post.image_id = None
unused, file_extension = os.path.splitext(form.link_url.data)
# this url is a link to an image - turn it into a image post
if file_extension.lower() in allowed_extensions:
file = File(source_url=form.link_url.data)
if post.url.endswith('.mp4') or post.url.endswith('.webm'):
file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image
post.image = file
db.session.add(file)
post.type = POST_TYPE_IMAGE
else:
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
opengraph = opengraph_parse(form.link_url.data)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
unused, file_extension = os.path.splitext(filename_for_extension)
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
file = url_to_thumbnail_file(filename)
if file:
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
post.image = file
db.session.add(file)
unused, file_extension = os.path.splitext(form.link_url.data)
# this url is a link to an image - turn it into a image post
if file_extension.lower() in allowed_extensions:
file = File(source_url=form.link_url.data)
post.image = file
db.session.add(file)
post.type = POST_TYPE_IMAGE
else:
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
opengraph = opengraph_parse(form.link_url.data)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
unused, file_extension = os.path.splitext(filename_for_extension)
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
file = url_to_thumbnail_file(filename)
if file:
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
post.image = file
db.session.add(file)
elif type == 'image':
post.title = form.image_title.data

View file

@ -25,7 +25,8 @@ from sqlalchemy_searchable import search
from app.utils import render_template, get_setting, gibberish, request_etag_matches, return_304, blocked_domains, \
ap_datetime, ip_address, retrieve_block_list, shorten_string, markdown_to_text, user_filters_home, \
joined_communities, moderating_communities, parse_page, theme_list, get_request, markdown_to_html, allowlist_html, \
blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts
blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts, \
generate_image_from_video_url
from app.models import Community, CommunityMember, Post, Site, User, utcnow, Domain, Topic, File, Instance, \
InstanceRole, Notification
from PIL import Image

View file

@ -1384,4 +1384,9 @@ h1 .warning_badge {
max-width: 100%;
}
.responsive-video {
max-width: 100%;
max-height: 90vh;
}
/*# sourceMappingURL=structure.css.map */

View file

@ -1058,3 +1058,8 @@ h1 .warning_badge {
max-width: 100%;
}
}
.responsive-video {
max-width: 100%;
max-height: 90vh;
}

View file

@ -83,6 +83,15 @@
<span class="fe fe-external"></span></a></p>
{% if post.url.endswith('.mp3') %}
<p><audio controls preload="{{ 'none' if low_bandwidth else 'metadata' }}" src="{{ post.url }}"></audio></p>
{% elif post.url.endswith('.mp4') or post.url.endswith('.webm') %}
<p>
<video class="responsive-video" controls preload="{{ 'metadata' if low_bandwidth else 'auto' }}">
{% if post.url.endswith('.mp4') %}
<source src="{{ post.url }}" media="video/mp4" />
{% elif post.url.endswith('.webm') %}
<source src="{{ post.url }}" media="video/webm" />
{% endif %}
</video></p>
{% endif %}
{% if 'youtube.com' in post.url %}
<p><a href="https://piped.video/watch?v={{ post.youtube_embed() }}">{{ _('Watch on piped.video') }} <span class="fe fe-external"></span></a></p>

View file

@ -4,6 +4,7 @@ import bisect
import hashlib
import mimetypes
import random
import tempfile
import urllib
from collections import defaultdict
from datetime import datetime, timedelta, date
@ -14,7 +15,7 @@ import math
from urllib.parse import urlparse, parse_qs, urlencode
from functools import wraps
import flask
from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
import warnings
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
import requests
@ -26,6 +27,8 @@ from wtforms.fields import SelectField, SelectMultipleField
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
from app import db, cache
import re
from moviepy.editor import VideoFileClip
from PIL import Image
from app.email import send_welcome_email
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
@ -881,6 +884,44 @@ def in_sorted_list(arr, target):
return index < len(arr) and arr[index] == target
# Makes a still image from a video url, without downloading the whole video file
def generate_image_from_video_url(video_url, output_path, length=2):
response = requests.get(video_url, stream=True)
content_type = response.headers.get('Content-Type')
if content_type:
if 'video/mp4' in content_type:
temp_file_extension = '.mp4'
elif 'video/webm' in content_type:
temp_file_extension = '.webm'
else:
raise ValueError("Unsupported video format")
else:
raise ValueError("Content-Type not found in response headers")
# Generate a random temporary file name
temp_file_name = gibberish(15) + temp_file_extension
temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
# Write the downloaded data to a temporary file
with open(temp_file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=4096):
f.write(chunk)
if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
break
# Generate thumbnail from the temporary file
clip = VideoFileClip(temp_file_path)
thumbnail = clip.get_frame(0)
clip.close()
# Save the image
thumbnail_image = Image.fromarray(thumbnail)
thumbnail_image.save(output_path)
os.remove(temp_file_path)
@cache.memoize(timeout=600)
def recently_upvoted_posts(user_id) -> List[int]:
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),

View file

@ -34,9 +34,10 @@ time of things.
# Coding Standards / Guidelines
**[PEP 8](https://peps.python.org/pep-0008/)** covers the basics. PyCharm encourages this by default -
VS Code coders are encouraged to try the free community edition of PyCharm but it is by no means required.
VS Code coders may like to try the free community edition of PyCharm but it is by no means required.
Use PEP 8 conventions for line length, naming, indentation. Use descriptive commit messages.
Use PEP 8 conventions for naming, indentation. Use descriptive commit messages. Try to limit lines of code
to a length of roughly 120 characters.
Database model classes are singular. As in "Car", not "Cars".

View file

@ -32,3 +32,4 @@ Werkzeug==2.3.3
pytesseract==0.3.10
sentry-sdk==1.40.6
python-slugify==8.0.4
moviepy==1.0.3