detect video posts better

This commit is contained in:
rimu 2024-09-07 11:15:33 +12:00
parent 1f5788e560
commit 026a7e699c
3 changed files with 21 additions and 4 deletions

View file

@ -33,7 +33,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
notification_subscribers, communities_banned_from, lemmy_markdown_to_html, actor_contains_blocked_words, \
html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \
moderating_communities
moderating_communities, is_video_hosting_site
from sqlalchemy import or_
@ -1933,7 +1933,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
image.alt_text = alt_text
db.session.add(image)
post.image = image
elif is_video_url(post.url):
elif is_video_url(post.url): # youtube is detected later
post.type = POST_TYPE_VIDEO
image = File(source_url=post.url)
db.session.add(image)
@ -2007,6 +2007,8 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
if post.url:
post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com
if is_video_hosting_site(post.url):
post.type = POST_TYPE_VIDEO
db.session.add(post)
post.ranking = post_ranking(post.score, post.posted_at)
community.post_count += 1

View file

@ -380,7 +380,10 @@ def save_post(form, post: Post, type: int):
db.session.add(file)
else:
# check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
opengraph = opengraph_parse(form.video_url.data)
tn_url = form.video_url.data
if tn_url[:32] == 'https://www.youtube.com/watch?v=':
tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com
opengraph = opengraph_parse(tn_url)
if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
filename = opengraph.get('og:image') or opengraph.get('og:image:url')
if not filename.startswith('/'):

View file

@ -188,7 +188,7 @@ def is_image_url(url):
return any(path.endswith(extension) for extension in common_image_extensions)
def is_video_url(url):
def is_video_url(url: str) -> bool:
common_video_extensions = ['.mp4', '.webm']
mime_type = mime_type_using_head(url)
if mime_type:
@ -200,6 +200,18 @@ def is_video_url(url):
return any(path.endswith(extension) for extension in common_video_extensions)
def is_video_hosting_site(url: str) -> bool:
video_hosting_sites = ['https://www.youtube.com', 'https://youtu.be', 'https://www.vimeo.com', 'https://www.redgifs.com/watch/']
for starts_with in video_hosting_sites:
if url.startswith(starts_with):
return True
if 'videos/watch' in url: # PeerTube
return True
return False
@cache.memoize(timeout=10)
def mime_type_using_head(url):
# Find the mime type of a url by doing a HEAD request - this is the same as GET except only the HTTP headers are transferred