mirror of
https://codeberg.org/rimu/pyfedi
synced 2025-02-02 16:21:32 -08:00
improve thumbnail generation reliability
This commit is contained in:
parent
805fd7c5d4
commit
115247f422
4 changed files with 80 additions and 62 deletions
|
@ -31,7 +31,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
|
|||
shorten_string, reply_already_exists, reply_is_just_link_to_gif_reaction, confidence, remove_tracking_from_link, \
|
||||
blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
|
||||
notification_subscribers, communities_banned_from, lemmy_markdown_to_html, actor_contains_blocked_words, \
|
||||
html_to_text
|
||||
html_to_text, opengraph_parse, url_to_thumbnail_file
|
||||
|
||||
|
||||
def public_key():
|
||||
|
@ -1734,6 +1734,16 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
|
|||
image = File(source_url=request_json['object']['image']['url'])
|
||||
db.session.add(image)
|
||||
post.image = image
|
||||
if post.image is None and post.type == POST_TYPE_LINK: # This is a link post but the source instance has not provided a thumbnail image
|
||||
# Let's see if we can do better than the source instance did!
|
||||
opengraph = opengraph_parse(post.url)
|
||||
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
|
||||
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
|
||||
if not filename.startswith('/'):
|
||||
file = File(source_url=filename, alt_text=shorten_string(opengraph.get('og:title'), 295))
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
|
||||
db.session.add(post)
|
||||
post.ranking = post_ranking(post.score, post.posted_at)
|
||||
community.post_count += 1
|
||||
|
|
|
@ -17,7 +17,7 @@ from app.community.forms import SearchRemoteCommunity, CreateDiscussionForm, Cre
|
|||
DeleteCommunityForm, AddCommunityForm, EditCommunityForm, AddModeratorForm, BanUserCommunityForm, \
|
||||
EscalateReportForm, ResolveReportForm, CreateVideoForm, CreatePollForm, RetrieveRemotePost
|
||||
from app.community.util import search_for_community, actor_to_community, \
|
||||
opengraph_parse, url_to_thumbnail_file, save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
|
||||
save_post, save_icon_file, save_banner_file, send_to_remote_instance, \
|
||||
delete_post_from_community, delete_post_reply_from_community, community_in_list
|
||||
from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, POST_TYPE_LINK, POST_TYPE_ARTICLE, POST_TYPE_IMAGE, \
|
||||
SUBSCRIPTION_PENDING, SUBSCRIPTION_MODERATOR, REPORT_STATE_NEW, REPORT_STATE_ESCALATED, REPORT_STATE_RESOLVED, \
|
||||
|
|
|
@ -19,7 +19,7 @@ from app.models import Community, File, BannedInstances, PostReply, PostVote, Po
|
|||
Instance, Notification, User, ActivityPubLog, NotificationSubscription, Language, Tag, PollChoice, Poll
|
||||
from app.utils import get_request, gibberish, markdown_to_html, domain_from_url, allowlist_html, \
|
||||
is_image_url, ensure_directory_exists, inbox_domain, post_ranking, shorten_string, parse_page, \
|
||||
remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases
|
||||
remove_tracking_from_link, ap_datetime, instance_banned, blocked_phrases, url_to_thumbnail_file, opengraph_parse
|
||||
from sqlalchemy import func, desc, text
|
||||
import os
|
||||
|
||||
|
@ -242,52 +242,6 @@ def actor_to_community(actor) -> Community:
|
|||
return community
|
||||
|
||||
|
||||
def opengraph_parse(url):
|
||||
if '?' in url:
|
||||
url = url.split('?')
|
||||
url = url[0]
|
||||
try:
|
||||
return parse_page(url)
|
||||
except Exception as ex:
|
||||
return None
|
||||
|
||||
|
||||
def url_to_thumbnail_file(filename) -> File:
|
||||
response = requests.get(filename, timeout=5)
|
||||
if response.status_code == 200:
|
||||
content_type = response.headers.get('content-type')
|
||||
if content_type and content_type.startswith('image'):
|
||||
# Generate file extension from mime type
|
||||
content_type_parts = content_type.split('/')
|
||||
if content_type_parts:
|
||||
file_extension = '.' + content_type_parts[-1]
|
||||
if file_extension == '.jpeg':
|
||||
file_extension = '.jpg'
|
||||
else:
|
||||
file_extension = os.path.splitext(filename)[1]
|
||||
file_extension = file_extension.replace('%3f', '?') # sometimes urls are not decoded properly
|
||||
if '?' in file_extension:
|
||||
file_extension = file_extension.split('?')[0]
|
||||
|
||||
new_filename = gibberish(15)
|
||||
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||
ensure_directory_exists(directory)
|
||||
final_place = os.path.join(directory, new_filename + file_extension)
|
||||
with open(final_place, 'wb') as f:
|
||||
f.write(response.content)
|
||||
response.close()
|
||||
Image.MAX_IMAGE_PIXELS = 89478485
|
||||
with Image.open(final_place) as img:
|
||||
img = ImageOps.exif_transpose(img)
|
||||
img.thumbnail((150, 150))
|
||||
img.save(final_place)
|
||||
thumbnail_width = img.width
|
||||
thumbnail_height = img.height
|
||||
return File(file_name=new_filename + file_extension, thumbnail_width=thumbnail_width,
|
||||
thumbnail_height=thumbnail_height, thumbnail_path=final_place,
|
||||
source_url=filename)
|
||||
|
||||
|
||||
def save_post(form, post: Post, type: str):
|
||||
post.indexable = current_user.indexable
|
||||
post.sticky = form.sticky.data
|
||||
|
@ -318,6 +272,7 @@ def save_post(form, post: Post, type: str):
|
|||
post.image_id = None
|
||||
|
||||
if post.url.endswith('.mp4') or post.url.endswith('.webm'):
|
||||
post.type = POST_TYPE_VIDEO
|
||||
file = File(source_url=form.link_url.data) # make_image_sizes() will take care of turning this into a still image
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
|
@ -331,15 +286,16 @@ def save_post(form, post: Post, type: str):
|
|||
post.type = POST_TYPE_IMAGE
|
||||
else:
|
||||
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
|
||||
opengraph = opengraph_parse(form.link_url.data)
|
||||
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
|
||||
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
|
||||
if not filename.startswith('/'):
|
||||
file = url_to_thumbnail_file(filename)
|
||||
if file:
|
||||
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
if not post.type == POST_TYPE_VIDEO:
|
||||
opengraph = opengraph_parse(form.link_url.data)
|
||||
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
|
||||
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
|
||||
if not filename.startswith('/'):
|
||||
file = url_to_thumbnail_file(filename)
|
||||
if file:
|
||||
file.alt_text = shorten_string(opengraph.get('og:title'), 295)
|
||||
post.image = file
|
||||
db.session.add(file)
|
||||
|
||||
elif type == 'image':
|
||||
post.title = form.image_title.data
|
||||
|
|
60
app/utils.py
60
app/utils.py
|
@ -31,11 +31,12 @@ from wtforms.widgets import Select, html_params, ListWidget, CheckboxInput
|
|||
from app import db, cache
|
||||
import re
|
||||
from moviepy.editor import VideoFileClip
|
||||
from PIL import Image
|
||||
from PIL import Image, ImageOps
|
||||
|
||||
from app.email import send_welcome_email
|
||||
from app.models import Settings, Domain, Instance, BannedInstances, User, Community, DomainBlock, ActivityPubLog, IpBan, \
|
||||
Site, Post, PostReply, utcnow, Filter, CommunityMember, InstanceBlock, CommunityBan, Topic, UserBlock, Language
|
||||
Site, Post, PostReply, utcnow, Filter, CommunityMember, InstanceBlock, CommunityBan, Topic, UserBlock, Language, \
|
||||
File
|
||||
|
||||
|
||||
# Flask's render_template function, with support for themes added
|
||||
|
@ -89,7 +90,8 @@ def get_request(uri, params=None, headers=None) -> requests.Response:
|
|||
else:
|
||||
payload_str = urllib.parse.urlencode(params) if params else None
|
||||
try:
|
||||
response = requests.get(uri, params=payload_str, headers=headers, timeout=5, allow_redirects=True)
|
||||
timeout = 15 if 'washingtonpost.com' in uri else 5 # Washington Post is really slow on og:image for some reason
|
||||
response = requests.get(uri, params=payload_str, headers=headers, timeout=timeout, allow_redirects=True)
|
||||
except requests.exceptions.SSLError as invalid_cert:
|
||||
# Not our problem if the other end doesn't have proper SSL
|
||||
current_app.logger.info(f"{uri} {invalid_cert}")
|
||||
|
@ -851,6 +853,56 @@ def confidence(ups, downs) -> float:
|
|||
return _confidence(ups, downs)
|
||||
|
||||
|
||||
def opengraph_parse(url):
|
||||
if '?' in url:
|
||||
url = url.split('?')
|
||||
url = url[0]
|
||||
try:
|
||||
return parse_page(url)
|
||||
except Exception as ex:
|
||||
return None
|
||||
|
||||
|
||||
def url_to_thumbnail_file(filename) -> File:
|
||||
try:
|
||||
timeout = 15 if 'washingtonpost.com' in filename else 5 # Washington Post is really slow for some reason
|
||||
response = requests.get(filename, timeout=timeout)
|
||||
except:
|
||||
return None
|
||||
if response.status_code == 200:
|
||||
content_type = response.headers.get('content-type')
|
||||
if content_type and content_type.startswith('image'):
|
||||
# Generate file extension from mime type
|
||||
content_type_parts = content_type.split('/')
|
||||
if content_type_parts:
|
||||
file_extension = '.' + content_type_parts[-1]
|
||||
if file_extension == '.jpeg':
|
||||
file_extension = '.jpg'
|
||||
else:
|
||||
file_extension = os.path.splitext(filename)[1]
|
||||
file_extension = file_extension.replace('%3f', '?') # sometimes urls are not decoded properly
|
||||
if '?' in file_extension:
|
||||
file_extension = file_extension.split('?')[0]
|
||||
|
||||
new_filename = gibberish(15)
|
||||
directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4]
|
||||
ensure_directory_exists(directory)
|
||||
final_place = os.path.join(directory, new_filename + file_extension)
|
||||
with open(final_place, 'wb') as f:
|
||||
f.write(response.content)
|
||||
response.close()
|
||||
Image.MAX_IMAGE_PIXELS = 89478485
|
||||
with Image.open(final_place) as img:
|
||||
img = ImageOps.exif_transpose(img)
|
||||
img.thumbnail((150, 150))
|
||||
img.save(final_place)
|
||||
thumbnail_width = img.width
|
||||
thumbnail_height = img.height
|
||||
return File(file_name=new_filename + file_extension, thumbnail_width=thumbnail_width,
|
||||
thumbnail_height=thumbnail_height, thumbnail_path=final_place,
|
||||
source_url=filename)
|
||||
|
||||
|
||||
# By no means is this a complete list, but it is very easy to search for the ones you need later.
|
||||
KNOWN_OPENGRAPH_TAGS = [
|
||||
"og:site_name",
|
||||
|
@ -980,7 +1032,7 @@ def in_sorted_list(arr, target):
|
|||
# Makes a still image from a video url, without downloading the whole video file
|
||||
def generate_image_from_video_url(video_url, output_path, length=2):
|
||||
|
||||
response = requests.get(video_url, stream=True)
|
||||
response = requests.get(video_url, stream=True, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0'}) # Imgur requires a user agent
|
||||
content_type = response.headers.get('Content-Type')
|
||||
if content_type:
|
||||
if 'video/mp4' in content_type:
|
||||
|
|
Loading…
Add table
Reference in a new issue