detect video posts better

2025-01-23 19:36:56 -08:00 · 2024-09-07 11:15:33 +12:00 · 2024-09-07 11:15:33 +12:00 · 026a7e699c
commit 026a7e699c
parent 1f5788e560
3 changed files with 21 additions and 4 deletions
--- a/app/activitypub/util.py
+++ b/app/activitypub/util.py
@ -33,7 +33,7 @@ from app.utils import get_request, allowlist_html, get_setting, ap_datetime, mar
    blocked_phrases, microblog_content_to_title, generate_image_from_video_url, is_video_url, reply_is_stupid, \
    notification_subscribers, communities_banned_from, lemmy_markdown_to_html, actor_contains_blocked_words, \
    html_to_text, opengraph_parse, url_to_thumbnail_file, add_to_modlog_activitypub, joined_communities, \
-    moderating_communities
+    moderating_communities, is_video_hosting_site

 from sqlalchemy import or_

@ -1933,7 +1933,7 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json
                        image.alt_text = alt_text
                db.session.add(image)
                post.image = image
-            elif is_video_url(post.url):
+            elif is_video_url(post.url):    # youtube is detected later
                post.type = POST_TYPE_VIDEO
                image = File(source_url=post.url)
                db.session.add(image)
@ -2007,6 +2007,8 @@ def create_post(activity_log: ActivityPubLog, community: Community, request_json

        if post.url:
            post.url = remove_tracking_from_link(post.url)      # moved here as changes youtu.be to youtube.com
+        if is_video_hosting_site(post.url):
+            post.type = POST_TYPE_VIDEO
        db.session.add(post)
        post.ranking = post_ranking(post.score, post.posted_at)
        community.post_count += 1
--- a/app/community/util.py
+++ b/app/community/util.py
@ -380,7 +380,10 @@ def save_post(form, post: Post, type: int):
                db.session.add(file)
            else:
                # check opengraph tags on the page and make a thumbnail if an image is available in the og:image meta tag
-                opengraph = opengraph_parse(form.video_url.data)
+                tn_url = form.video_url.data
+                if tn_url[:32] == 'https://www.youtube.com/watch?v=':
+                    tn_url = 'https://youtu.be/' + tn_url[32:43]  # better chance of thumbnail from youtu.be than youtube.com
+                opengraph = opengraph_parse(tn_url)
                if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''):
                    filename = opengraph.get('og:image') or opengraph.get('og:image:url')
                    if not filename.startswith('/'):
--- a/app/utils.py
+++ b/app/utils.py
@ -188,7 +188,7 @@ def is_image_url(url):
        return any(path.endswith(extension) for extension in common_image_extensions)


-def is_video_url(url):
+def is_video_url(url: str) -> bool:
    common_video_extensions = ['.mp4', '.webm']
    mime_type = mime_type_using_head(url)
    if mime_type:
@ -200,6 +200,18 @@ def is_video_url(url):
        return any(path.endswith(extension) for extension in common_video_extensions)


+def is_video_hosting_site(url: str) -> bool:
+    video_hosting_sites = ['https://www.youtube.com', 'https://youtu.be', 'https://www.vimeo.com', 'https://www.redgifs.com/watch/']
+    for starts_with in video_hosting_sites:
+        if url.startswith(starts_with):
+            return True
+
+    if 'videos/watch' in url:   # PeerTube
+        return True
+
+    return False
+
+
@cache.memoize(timeout=10)
 def mime_type_using_head(url):
    # Find the mime type of a url by doing a HEAD request - this is the same as GET except only the HTTP headers are transferred