From 01423586b555f0eca6a7f868b1f4590aba89d548 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Fri, 8 Mar 2024 22:09:54 +1300 Subject: [PATCH] refactor remove_tracking_from_link to preserve the t parameter in youtube urls, without regex #34 --- app/utils.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/app/utils.py b/app/utils.py index c9bfea4a..93cf3559 100644 --- a/app/utils.py +++ b/app/utils.py @@ -10,7 +10,7 @@ from typing import List, Literal, Union import markdown2 import math -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode from functools import wraps import flask from bs4 import BeautifulSoup, NavigableString @@ -751,11 +751,25 @@ def sha256_digest(input_string): def remove_tracking_from_link(url): - # strip ?si=abcDEFgh from youtu.be links - clean = re.search(r"(https://youtu.be/\w+)", url) + parsed_url = urlparse(url) - if clean is not None: - return clean.group(1) + if parsed_url.netloc == 'youtu.be': + # Extract video ID + video_id = parsed_url.path[1:] # Remove leading slash + + # Preserve 't' parameter if it exists + query_params = parse_qs(parsed_url.query) + if 't' in query_params: + new_query_params = {'t': query_params['t']} + new_query_string = urlencode(new_query_params, doseq=True) + else: + new_query_string = '' + + cleaned_url = f"https://youtu.be/{video_id}" + if new_query_string: + cleaned_url += f"?{new_query_string}" + + return cleaned_url else: return url