From 4d7acb9396886066176eb6286bd2988977742dc5 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Wed, 1 May 2024 08:48:37 +1200 Subject: [PATCH] generate titles from microblog posts using the first sentence of the first paragraph (limited to 150 chars, as before) --- app/utils.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/app/utils.py b/app/utils.py index a8a522e8..2b6621e2 100644 --- a/app/utils.py +++ b/app/utils.py @@ -247,28 +247,22 @@ def markdown_to_text(markdown_text) -> str: def microblog_content_to_title(html: str) -> str: soup = BeautifulSoup(html, 'html.parser') - title_found = False - for tag in soup.find_all(): - if tag.name == 'p': - if not title_found: - title_found = True - continue - else: - tag = tag.extract() - else: - tag = tag.extract() + title = '' + for tag in soup.find_all('p'): + title = tag.get_text() + break - if title_found: - result = soup.text - if len(result) > 150: - for i in range(149, -1, -1): - if result[i] == ' ': - break; - result = result[:i] + ' ...' if i > 0 else '' - else: - result = '' + period_index = title.find('.') + if period_index != -1: + title = title[:period_index] - return result + if len(title) > 150: + for i in range(149, -1, -1): + if title[i] == ' ': + break + title = title[:i] + ' ...' if i > 0 else '' + + return title def community_link_to_href(link: str) -> str: