mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-01-23 11:26:56 -08:00
embed video and generate still thumbnail
This commit is contained in:
parent
01a235725e
commit
abd4dd16c9
9 changed files with 193 additions and 85 deletions
|
@ -27,7 +27,7 @@ import pytesseract
|
|||
from app.utils import get_request, allowlist_html, get_setting, ap_datetime, markdown_to_html, \
|
||||
is_image_url, domain_from_url, gibberish, ensure_directory_exists, markdown_to_text, head_request, post_ranking, \
|
||||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
|
||||
blocked_phrases, microblog_content_to_title
|
||||
blocked_phrases, microblog_content_to_title, generate_image_from_video_url
|
||||
|
||||
|
||||
def public_key():
|
||||
|
@ -738,78 +738,117 @@ def make_image_sizes(file_id, thumbnail_width=50, medium_width=120, directory='p
|
|||
def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory):
|
||||
file = File.query.get(file_id)
|
||||
if file and file.source_url:
|
||||
try:
|
||||
source_image_response = get_request(file.source_url)
|
||||
except:
|
||||
pass
|
||||
# Videos
|
||||
if file.source_url.endswith('.mp4') or file.source_url.endswith('.webm'):
|
||||
new_filename = gibberish(15)
|
||||
|
||||
# set up the storage directory
|
||||
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||
ensure_directory_exists(directory)
|
||||
|
||||
# file path and names to store the resized images on disk
|
||||
final_place = os.path.join(directory, new_filename + '.jpg')
|
||||
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
||||
|
||||
generate_image_from_video_url(file.source_url, final_place)
|
||||
|
||||
image = Image.open(final_place)
|
||||
img_width = image.width
|
||||
|
||||
# Resize the image to medium
|
||||
if medium_width:
|
||||
if img_width > medium_width:
|
||||
image.thumbnail((medium_width, medium_width))
|
||||
image.save(final_place)
|
||||
file.file_path = final_place
|
||||
file.width = image.width
|
||||
file.height = image.height
|
||||
|
||||
# Resize the image to a thumbnail (webp)
|
||||
if thumbnail_width:
|
||||
if img_width > thumbnail_width:
|
||||
image.thumbnail((thumbnail_width, thumbnail_width))
|
||||
image.save(final_place_thumbnail, format="WebP", quality=93)
|
||||
file.thumbnail_path = final_place_thumbnail
|
||||
file.thumbnail_width = image.width
|
||||
file.thumbnail_height = image.height
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Images
|
||||
else:
|
||||
if source_image_response.status_code == 200:
|
||||
content_type = source_image_response.headers.get('content-type')
|
||||
if content_type and content_type.startswith('image'):
|
||||
source_image = source_image_response.content
|
||||
source_image_response.close()
|
||||
try:
|
||||
source_image_response = get_request(file.source_url)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if source_image_response.status_code == 200:
|
||||
content_type = source_image_response.headers.get('content-type')
|
||||
if content_type and content_type.startswith('image'):
|
||||
source_image = source_image_response.content
|
||||
source_image_response.close()
|
||||
|
||||
file_ext = os.path.splitext(file.source_url)[1]
|
||||
# fall back to parsing the http content type if the url does not contain a file extension
|
||||
if file_ext == '':
|
||||
content_type_parts = content_type.split('/')
|
||||
if content_type_parts:
|
||||
file_ext = '.' + content_type_parts[-1]
|
||||
else:
|
||||
if '?' in file_ext:
|
||||
file_ext = file_ext.split('?')[0]
|
||||
file_ext = os.path.splitext(file.source_url)[1]
|
||||
# fall back to parsing the http content type if the url does not contain a file extension
|
||||
if file_ext == '':
|
||||
content_type_parts = content_type.split('/')
|
||||
if content_type_parts:
|
||||
file_ext = '.' + content_type_parts[-1]
|
||||
else:
|
||||
if '?' in file_ext:
|
||||
file_ext = file_ext.split('?')[0]
|
||||
|
||||
new_filename = gibberish(15)
|
||||
new_filename = gibberish(15)
|
||||
|
||||
# set up the storage directory
|
||||
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||
ensure_directory_exists(directory)
|
||||
# set up the storage directory
|
||||
directory = f'app/static/media/{directory}/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||
ensure_directory_exists(directory)
|
||||
|
||||
# file path and names to store the resized images on disk
|
||||
final_place = os.path.join(directory, new_filename + file_ext)
|
||||
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
||||
# file path and names to store the resized images on disk
|
||||
final_place = os.path.join(directory, new_filename + file_ext)
|
||||
final_place_thumbnail = os.path.join(directory, new_filename + '_thumbnail.webp')
|
||||
|
||||
# Load image data into Pillow
|
||||
Image.MAX_IMAGE_PIXELS = 89478485
|
||||
image = Image.open(BytesIO(source_image))
|
||||
image = ImageOps.exif_transpose(image)
|
||||
img_width = image.width
|
||||
img_height = image.height
|
||||
# Load image data into Pillow
|
||||
Image.MAX_IMAGE_PIXELS = 89478485
|
||||
image = Image.open(BytesIO(source_image))
|
||||
image = ImageOps.exif_transpose(image)
|
||||
img_width = image.width
|
||||
img_height = image.height
|
||||
|
||||
# Resize the image to medium
|
||||
if medium_width:
|
||||
if img_width > medium_width:
|
||||
image.thumbnail((medium_width, medium_width))
|
||||
image.save(final_place)
|
||||
file.file_path = final_place
|
||||
file.width = image.width
|
||||
file.height = image.height
|
||||
# Resize the image to medium
|
||||
if medium_width:
|
||||
if img_width > medium_width:
|
||||
image.thumbnail((medium_width, medium_width))
|
||||
image.save(final_place)
|
||||
file.file_path = final_place
|
||||
file.width = image.width
|
||||
file.height = image.height
|
||||
|
||||
# Resize the image to a thumbnail (webp)
|
||||
if thumbnail_width:
|
||||
if img_width > thumbnail_width:
|
||||
image.thumbnail((thumbnail_width, thumbnail_width))
|
||||
image.save(final_place_thumbnail, format="WebP", quality=93)
|
||||
file.thumbnail_path = final_place_thumbnail
|
||||
file.thumbnail_width = image.width
|
||||
file.thumbnail_height = image.height
|
||||
# Resize the image to a thumbnail (webp)
|
||||
if thumbnail_width:
|
||||
if img_width > thumbnail_width:
|
||||
image.thumbnail((thumbnail_width, thumbnail_width))
|
||||
image.save(final_place_thumbnail, format="WebP", quality=93)
|
||||
file.thumbnail_path = final_place_thumbnail
|
||||
file.thumbnail_width = image.width
|
||||
file.thumbnail_height = image.height
|
||||
|
||||
db.session.commit()
|
||||
db.session.commit()
|
||||
|
||||
# Alert regarding fascist meme content
|
||||
if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
|
||||
try:
|
||||
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
|
||||
except FileNotFoundError as e:
|
||||
image_text = ''
|
||||
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
|
||||
post = Post.query.filter_by(image_id=file.id).first()
|
||||
notification = Notification(title='Review this',
|
||||
user_id=1,
|
||||
author_id=post.user_id,
|
||||
url=url_for('activitypub.post_ap', post_id=post.id))
|
||||
db.session.add(notification)
|
||||
db.session.commit()
|
||||
# Alert regarding fascist meme content
|
||||
if img_width < 2000: # images > 2000px tend to be real photos instead of 4chan screenshots.
|
||||
try:
|
||||
image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L'), timeout=30)
|
||||
except FileNotFoundError as e:
|
||||
image_text = ''
|
||||
if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345'
|
||||
post = Post.query.filter_by(image_id=file.id).first()
|
||||
notification = Notification(title='Review this',
|
||||
user_id=1,
|
||||
author_id=post.user_id,
|
||||
url=url_for('activitypub.post_ap', post_id=post.id))
|
||||
db.session.add(notification)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
# create a summary from markdown if present, otherwise use html if available
|
||||
|
|
|
@ -112,7 +112,8 @@ def retrieve_mods_and_backfill(community_id: int):
|
|||
post.ranking = post_ranking(post.score, post.posted_at)
|
||||
if post.url:
|
||||
other_posts = Post.query.filter(Post.id != post.id, Post.url == post.url,
|
||||
Post.posted_at > post.posted_at - timedelta(days=3), Post.posted_at < post.posted_at + timedelta(days=3)).all()
|
||||
Post.posted_at > post.posted_at - timedelta(days=3),
|
||||
Post.posted_at < post.posted_at + timedelta(days=3)).all()
|
||||
for op in other_posts:
|
||||
if op.cross_posts is None:
|
||||
op.cross_posts = [post.id]
|
||||
|
@ -223,26 +224,31 @@ def save_post(form, post: Post, type: str):
|
|||
remove_old_file(post.image_id)
|
||||
post.image_id = None
|
||||
|
||||
unused, file_extension = os.path.splitext(form.link_url.data)
|
||||
# this url is a link to an image - turn it into a image post
|
||||
if file_extension.lower() in allowed_extensions:
|
||||
file = File(source_url=form.link_url.data)
|
||||
if post.url.endswith('.mp4') or post.url.endswith('.webm'):
|
||||
file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
post.type = POST_TYPE_IMAGE
|
||||
else:
|
||||
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
|
||||
opengraph = opengraph_parse(form.link_url.data)
|
||||
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
|
||||
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
|
||||
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
|
||||
unused, file_extension = os.path.splitext(filename_for_extension)
|
||||
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
|
||||
file = url_to_thumbnail_file(filename)
|
||||
if file:
|
||||
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
unused, file_extension = os.path.splitext(form.link_url.data)
|
||||
# this url is a link to an image - turn it into a image post
|
||||
if file_extension.lower() in allowed_extensions:
|
||||
file = File(source_url=form.link_url.data)
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
post.type = POST_TYPE_IMAGE
|
||||
else:
|
||||
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
|
||||
opengraph = opengraph_parse(form.link_url.data)
|
||||
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
|
||||
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
|
||||
filename_for_extension = filename.split('?')[0] if '?' in filename else filename
|
||||
unused, file_extension = os.path.splitext(filename_for_extension)
|
||||
if file_extension.lower() in allowed_extensions and not filename.startswith('/'):
|
||||
file = url_to_thumbnail_file(filename)
|
||||
if file:
|
||||
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
|
||||
elif type == 'image':
|
||||
post.title = form.image_title.data
|
||||
|
|
|
@ -25,7 +25,8 @@ from sqlalchemy_searchable import search
|
|||
from app.utils import render_template, get_setting, gibberish, request_etag_matches, return_304, blocked_domains, \
|
||||
ap_datetime, ip_address, retrieve_block_list, shorten_string, markdown_to_text, user_filters_home, \
|
||||
joined_communities, moderating_communities, parse_page, theme_list, get_request, markdown_to_html, allowlist_html, \
|
||||
blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts
|
||||
blocked_instances, communities_banned_from, topic_tree, recently_upvoted_posts, recently_downvoted_posts, \
|
||||
generate_image_from_video_url
|
||||
from app.models import Community, CommunityMember, Post, Site, User, utcnow, Domain, Topic, File, Instance, \
|
||||
InstanceRole, Notification
|
||||
from PIL import Image
|
||||
|
|
|
@ -1384,4 +1384,9 @@ h1 .warning_badge {
|
|||
max-width: 100%;
|
||||
}
|
||||
|
||||
.responsive-video {
|
||||
max-width: 100%;
|
||||
max-height: 90vh;
|
||||
}
|
||||
|
||||
/*# sourceMappingURL=structure.css.map */
|
||||
|
|
|
@ -1057,4 +1057,9 @@ h1 .warning_badge {
|
|||
line-height: initial;
|
||||
max-width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
.responsive-video {
|
||||
max-width: 100%;
|
||||
max-height: 90vh;
|
||||
}
|
|
@ -83,6 +83,15 @@
|
|||
<span class="fe fe-external"></span></a></p>
|
||||
{% if post.url.endswith('.mp3') %}
|
||||
<p><audio controls preload="{{ 'none' if low_bandwidth else 'metadata' }}" src="{{ post.url }}"></audio></p>
|
||||
{% elif post.url.endswith('.mp4') or post.url.endswith('.webm') %}
|
||||
<p>
|
||||
<video class="responsive-video" controls preload="{{ 'metadata' if low_bandwidth else 'auto' }}">
|
||||
{% if post.url.endswith('.mp4') %}
|
||||
<source src="{{ post.url }}" media="video/mp4" />
|
||||
{% elif post.url.endswith('.webm') %}
|
||||
<source src="{{ post.url }}" media="video/webm" />
|
||||
{% endif %}
|
||||
</video></p>
|
||||
{% endif %}
|
||||
{% if 'youtube.com' in post.url %}
|
||||
<p><a href="https://piped.video/watch?v={{ post.youtube_embed() }}">{{ _('Watch on piped.video') }} <span class="fe fe-external"></span></a></p>
|
||||
|
|
43
app/utils.py
43
app/utils.py
|
@ -4,6 +4,7 @@ import bisect
|
|||
import hashlib
|
||||
import mimetypes
|
||||
import random
|
||||
import tempfile
|
||||
import urllib
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta, date
|
||||
|
@ -14,7 +15,7 @@ import math
|
|||
from urllib.parse import urlparse, parse_qs, urlencode
|
||||
from functools import wraps
|
||||
import flask
|
||||
from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning
|
||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
|
||||
import requests
|
||||
|
@ -26,6 +27,8 @@ from wtforms.fields import SelectField, SelectMultipleField
|
|||
from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
||||
from app import db, cache
|
||||
import re
|
||||
from moviepy.editor import VideoFileClip
|
||||
from PIL import Image
|
||||
|
||||
from app.email import send_welcome_email
|
||||
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
||||
|
@ -881,6 +884,44 @@ def in_sorted_list(arr, target):
|
|||
return index < len(arr) and arr[index] == target
|
||||
|
||||
|
||||
# Makes a still image from a video url, without downloading the whole video file
|
||||
def generate_image_from_video_url(video_url, output_path, length=2):
|
||||
|
||||
response = requests.get(video_url, stream=True)
|
||||
content_type = response.headers.get('Content-Type')
|
||||
if content_type:
|
||||
if 'video/mp4' in content_type:
|
||||
temp_file_extension = '.mp4'
|
||||
elif 'video/webm' in content_type:
|
||||
temp_file_extension = '.webm'
|
||||
else:
|
||||
raise ValueError("Unsupported video format")
|
||||
else:
|
||||
raise ValueError("Content-Type not found in response headers")
|
||||
|
||||
# Generate a random temporary file name
|
||||
temp_file_name = gibberish(15) + temp_file_extension
|
||||
temp_file_path = os.path.join(tempfile.gettempdir(), temp_file_name)
|
||||
|
||||
# Write the downloaded data to a temporary file
|
||||
with open(temp_file_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=4096):
|
||||
f.write(chunk)
|
||||
if os.path.getsize(temp_file_path) >= length * 1024 * 1024:
|
||||
break
|
||||
|
||||
# Generate thumbnail from the temporary file
|
||||
clip = VideoFileClip(temp_file_path)
|
||||
thumbnail = clip.get_frame(0)
|
||||
clip.close()
|
||||
|
||||
# Save the image
|
||||
thumbnail_image = Image.fromarray(thumbnail)
|
||||
thumbnail_image.save(output_path)
|
||||
|
||||
os.remove(temp_file_path)
|
||||
|
||||
|
||||
@cache.memoize(timeout=600)
|
||||
def recently_upvoted_posts(user_id) -> List[int]:
|
||||
post_ids = db.session.execute(text('SELECT post_id FROM "post_vote" WHERE user_id = :user_id AND effect > 0 ORDER BY id DESC LIMIT 1000'),
|
||||
|
|
|
@ -34,9 +34,10 @@ time of things.
|
|||
# Coding Standards / Guidelines
|
||||
|
||||
**[PEP 8](https://peps.python.org/pep-0008/)** covers the basics. PyCharm encourages this by default -
|
||||
VS Code coders are encouraged to try the free community edition of PyCharm but it is by no means required.
|
||||
VS Code coders may like to try the free community edition of PyCharm but it is by no means required.
|
||||
|
||||
Use PEP 8 conventions for line length, naming, indentation. Use descriptive commit messages.
|
||||
Use PEP 8 conventions for naming, indentation. Use descriptive commit messages. Try to limit lines of code
|
||||
to a length of roughly 120 characters.
|
||||
|
||||
Database model classes are singular. As in "Car", not "Cars".
|
||||
|
||||
|
|
|
@ -32,3 +32,4 @@ Werkzeug==2.3.3
|
|||
pytesseract==0.3.10
|
||||
sentry-sdk==1.40.6
|
||||
python-slugify==8.0.4
|
||||
moviepy==1.0.3
|
||||
|
|
Loading…
Reference in a new issue