From 424f8e004f8141dc3d2550c8def43799bfa53590 Mon Sep 17 00:00:00 2001 From: freamon Date: Fri, 10 May 2024 15:46:16 +0100 Subject: [PATCH] Handle extraneous html/text when generating titles for microblog content --- app/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/utils.py b/app/utils.py index 196e6888..7228a49d 100644 --- a/app/utils.py +++ b/app/utils.py @@ -250,9 +250,10 @@ def microblog_content_to_title(html: str) -> str: title = '' for tag in soup.find_all('p'): - title = tag.get_text() + title = tag.get_text(separator=" ") break else: + html = html.replace('<', '.', 1) title = shorten_string(html, 160) period_index = title.find('.') @@ -270,9 +271,9 @@ def microblog_content_to_title(html: str) -> str: return title if end_index != -1: - if question_index != -1: + if question_index != -1 and question_index == end_index: end_index += 1 # Add the ? back on - if exclamation_index != -1: + if exclamation_index != -1 and exclamation_index == end_index: end_index += 1 # Add the ! back on title = title[:end_index]