generate titles from microblog posts using the first sentence of the first paragraph

(limited to 150 chars, as before)
This commit is contained in:
rimu 2024-05-01 08:48:37 +12:00
parent 76d228f5fa
commit 4d7acb9396

View file

@ -247,28 +247,22 @@ def markdown_to_text(markdown_text) -> str:
def microblog_content_to_title(html: str) -> str:
soup = BeautifulSoup(html, 'html.parser')
title_found = False
for tag in soup.find_all():
if tag.name == 'p':
if not title_found:
title_found = True
continue
else:
tag = tag.extract()
else:
tag = tag.extract()
title = ''
for tag in soup.find_all('p'):
title = tag.get_text()
break
if title_found:
result = soup.text
if len(result) > 150:
period_index = title.find('.')
if period_index != -1:
title = title[:period_index]
if len(title) > 150:
for i in range(149, -1, -1):
if result[i] == ' ':
break;
result = result[:i] + ' ...' if i > 0 else ''
else:
result = ''
if title[i] == ' ':
break
title = title[:i] + ' ...' if i > 0 else ''
return result
return title
def community_link_to_href(link: str) -> str: