From e2ce7c832f6ff1bd364a6b73b69fc2b8380bad41 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:46:36 +0800 Subject: [PATCH] better image handling - use content-type header instead of extension #fixes 37 --- app/community/util.py | 58 ++++++++++++++++++++++++------------------- app/utils.py | 37 ++++++++++++++++++++++----- 2 files changed, 64 insertions(+), 31 deletions(-) diff --git a/app/community/util.py b/app/community/util.py index decfe47b..225bfeb5 100644 --- a/app/community/util.py +++ b/app/community/util.py @@ -253,27 +253,39 @@ def opengraph_parse(url): def url_to_thumbnail_file(filename) -> File: - filename_for_extension = filename.split('?')[0] if '?' in filename else filename - unused, file_extension = os.path.splitext(filename_for_extension) response = requests.get(filename, timeout=5) if response.status_code == 200: - new_filename = gibberish(15) - directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4] - ensure_directory_exists(directory) - final_place = os.path.join(directory, new_filename + file_extension) - with open(final_place, 'wb') as f: - f.write(response.content) - response.close() - Image.MAX_IMAGE_PIXELS = 89478485 - with Image.open(final_place) as img: - img = ImageOps.exif_transpose(img) - img.thumbnail((150, 150)) - img.save(final_place) - thumbnail_width = img.width - thumbnail_height = img.height - return File(file_name=new_filename + file_extension, thumbnail_width=thumbnail_width, - thumbnail_height=thumbnail_height, thumbnail_path=final_place, - source_url=filename) + content_type = response.headers.get('content-type') + if content_type and content_type.startswith('image'): + # Generate file extension from mime type + content_type_parts = content_type.split('/') + if content_type_parts: + file_extension = '.' + content_type_parts[-1] + if file_extension == '.jpeg': + file_extension = '.jpg' + else: + file_extension = os.path.splitext(filename)[1] + file_extension = file_extension.replace('%3f', '?') # sometimes urls are not decoded properly + if '?' in file_extension: + file_extension = file_extension.split('?')[0] + + new_filename = gibberish(15) + directory = 'app/static/media/posts/' + new_filename[0:2] + '/' + new_filename[2:4] + ensure_directory_exists(directory) + final_place = os.path.join(directory, new_filename + file_extension) + with open(final_place, 'wb') as f: + f.write(response.content) + response.close() + Image.MAX_IMAGE_PIXELS = 89478485 + with Image.open(final_place) as img: + img = ImageOps.exif_transpose(img) + img.thumbnail((150, 150)) + img.save(final_place) + thumbnail_width = img.width + thumbnail_height = img.height + return File(file_name=new_filename + file_extension, thumbnail_width=thumbnail_width, + thumbnail_height=thumbnail_height, thumbnail_path=final_place, + source_url=filename) def save_post(form, post: Post, type: str): @@ -322,9 +334,7 @@ def save_post(form, post: Post, type: str): opengraph = opengraph_parse(form.link_url.data) if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): filename = opengraph.get('og:image') or opengraph.get('og:image:url') - filename_for_extension = filename.split('?')[0] if '?' in filename else filename - unused, file_extension = os.path.splitext(filename_for_extension) - if file_extension.lower() in allowed_extensions and not filename.startswith('/'): + if not filename.startswith('/'): file = url_to_thumbnail_file(filename) if file: file.alt_text = shorten_string(opengraph.get('og:title'), 295) @@ -415,9 +425,7 @@ def save_post(form, post: Post, type: str): opengraph = opengraph_parse(form.video_url.data) if opengraph and (opengraph.get('og:image', '') != '' or opengraph.get('og:image:url', '') != ''): filename = opengraph.get('og:image') or opengraph.get('og:image:url') - filename_for_extension = filename.split('?')[0] if '?' in filename else filename - unused, file_extension = os.path.splitext(filename_for_extension) - if file_extension.lower() in allowed_extensions and not filename.startswith('/'): + if not filename.startswith('/'): file = url_to_thumbnail_file(filename) if file: file.alt_text = shorten_string(opengraph.get('og:title'), 295) diff --git a/app/utils.py b/app/utils.py index da5c24a8..a8166e7b 100644 --- a/app/utils.py +++ b/app/utils.py @@ -165,17 +165,42 @@ def gibberish(length: int = 10) -> str: def is_image_url(url): - parsed_url = urlparse(url) - path = parsed_url.path.lower() common_image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'] - return any(path.endswith(extension) for extension in common_image_extensions) + mime_type = mime_type_using_head(url) + if mime_type: + mime_type_parts = mime_type.split('/') + return f'.{mime_type_parts[1]}' in common_image_extensions + else: + parsed_url = urlparse(url) + path = parsed_url.path.lower() + return any(path.endswith(extension) for extension in common_image_extensions) def is_video_url(url): - parsed_url = urlparse(url) - path = parsed_url.path.lower() common_video_extensions = ['.mp4', '.webm'] - return any(path.endswith(extension) for extension in common_video_extensions) + mime_type = mime_type_using_head(url) + if mime_type: + mime_type_parts = mime_type.split('/') + return f'.{mime_type_parts[1]}' in common_video_extensions + else: + parsed_url = urlparse(url) + path = parsed_url.path.lower() + return any(path.endswith(extension) for extension in common_video_extensions) + + +@cache.memoize(timeout=10) +def mime_type_using_head(url): + # Find the mime type of a url by doing a HEAD request - this is the same as GET except only the HTTP headers are transferred + try: + response = requests.head(url) + response.raise_for_status() # Raise an exception for HTTP errors + content_type = response.headers.get('Content-Type') + if content_type: + return content_type + else: + return '' + except requests.exceptions.RequestException as e: + return '' # sanitise HTML using an allow list