From 8137c6b4b1bbb6fee639a979150fc14a190c8f9d Mon Sep 17 00:00:00 2001 From: freamon Date: Mon, 17 Jun 2024 16:54:27 +0100 Subject: [PATCH] Simplify regex for raw URLs in text --- app/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils.py b/app/utils.py index 3376e228..da5c24a8 100644 --- a/app/utils.py +++ b/app/utils.py @@ -188,7 +188,7 @@ def allowlist_html(html: str) -> str: soup = BeautifulSoup(html, 'html.parser') # Find all plain text links, convert to tags - re_url = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)') + re_url = re.compile(r'(http[s]?://[!-~]+)') # http(s):// followed by chars in ASCII range 33 to 126 for tag in soup.find_all(text=True): tags = [] url = False