From 026a7e699cfb324664e343ba3fa3282160504b31 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Sat, 7 Sep 2024 11:15:33 +1200 Subject: [PATCH] detect video posts better --- app/activitypub/util.py | 6 ++++-- app/community/util.py | 5 ++++- app/utils.py | 14 +++++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 3078a595..7a41f7dd 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -33,7 +33,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \ notification_subscribers, communities_banned_from, lemmy_markdown_to_html, actor_contains_blocked_words, \ html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \ - moderating_communities + moderating_communities, is_video_hosting_site from sqlalchemy import or_ @@ -1933,7 +1933,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json image.alt_text = alt_text db.session.add(image) post.image = image - elif is_video_url(post.url): + elif is_video_url(post.url): # youtube is detected later post.type = POST_TYPE_VIDEO image = File(source_url=post.url) db.session.add(image) @@ -2007,6 +2007,8 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json if post.url: post.url = remove_tracking_from_link(post.url) # moved here as changes youtu.be to youtube.com + if is_video_hosting_site(post.url): + post.type = POST_TYPE_VIDEO db.session.add(post) post.ranking = post_ranking(post.score, post.posted_at) community.post_count += 1 diff --git a/app/community/util.py b/app/community/util.py index 62b9d617..ff9e279e 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -380,7 +380,10 @@ def save_post(form, post: Post, type: int): db.session.add(file) else: # check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag - opengraph = opengraph_parse(form.video_url.data) + tn_url = form.video_url.data + if tn_url[:32] == 'https://www.youtube.com/watch?v=': + tn_url = 'https://youtu.be/' + tn_url[32:43] # better chance of thumbnail from youtu.be than youtube.com + opengraph = opengraph_parse(tn_url) if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): filename = opengraph.get('og:image') or opengraph.get('og:image:url') if not filename.startswith('/'): diff --git a/app/utils.py b/app/utils.py index 024836b7..d94992e8 100644 --- a/app/utils.py +++ b/app/utils.py @@ -188,7 +188,7 @@ def is_image_url(url): return any(path.endswith(extension) for extension in common_image_extensions) -def is_video_url(url): +def is_video_url(url: str) -> bool: common_video_extensions = ['.mp4', '.webm'] mime_type = mime_type_using_head(url) if mime_type: @@ -200,6 +200,18 @@ def is_video_url(url): return any(path.endswith(extension) for extension in common_video_extensions) +def is_video_hosting_site(url: str) -> bool: + video_hosting_sites = ['https://www.youtube.com', 'https://youtu.be', 'https://www.vimeo.com', 'https://www.redgifs.com/watch/'] + for starts_with in video_hosting_sites: + if url.startswith(starts_with): + return True + + if 'videos/watch' in url: # PeerTube + return True + + return False + + @cache.memoize(timeout=10) def mime_type_using_head(url): # Find the mime type of a url by doing a HEAD request - this is the same as GET except only the HTTP headers are transferred