add bypass paywall links for several sites #205

This commit is contained in:
rimu 2024-06-16 17:55:08 +08:00
parent b578bf27a4
commit 3f2690a42b
3 changed files with 31 additions and 2 deletions

View file

@ -15,7 +15,8 @@ from app.community.util import save_post, send_to_remote_instance
from app.inoculation import inoculation from app.inoculation import inoculation
from app.post.forms import NewReplyForm, ReportPostForm, MeaCulpaForm from app.post.forms import NewReplyForm, ReportPostForm, MeaCulpaForm
from app.community.forms import CreateLinkForm, CreateImageForm, CreateDiscussionForm, CreateVideoForm, CreatePollForm from app.community.forms import CreateLinkForm, CreateImageForm, CreateDiscussionForm, CreateVideoForm, CreatePollForm
from app.post.util import post_replies, get_comment_branch, post_reply_count, tags_to_string from app.post.util import post_replies, get_comment_branch, post_reply_count, tags_to_string, url_has_paywall, \
generate_paywall_bypass_link
from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, SUBSCRIPTION_MODERATOR, POST_TYPE_LINK, \ from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, SUBSCRIPTION_MODERATOR, POST_TYPE_LINK, \
POST_TYPE_IMAGE, \ POST_TYPE_IMAGE, \
POST_TYPE_ARTICLE, POST_TYPE_VIDEO, NOTIF_REPLY, NOTIF_POST, POST_TYPE_POLL POST_TYPE_ARTICLE, POST_TYPE_VIDEO, NOTIF_REPLY, NOTIF_POST, POST_TYPE_POLL
@ -299,6 +300,11 @@ def show_post(post_id: int):
else: else:
poll_form = True poll_form = True
# Bypass paywalls link
bypass_paywall_link = None
if post.type == POST_TYPE_LINK and 'https://archive.' not in post.body_html and url_has_paywall(post.url):
bypass_paywall_link = generate_paywall_bypass_link(post.url)
response = render_template('post/post.html', title=post.title, post=post, is_moderator=is_moderator, community=post.community, response = render_template('post/post.html', title=post.title, post=post, is_moderator=is_moderator, community=post.community,
breadcrumbs=breadcrumbs, related_communities=related_communities, mods=mod_list, breadcrumbs=breadcrumbs, related_communities=related_communities, mods=mod_list,
poll_form=poll_form, poll_results=poll_results, poll_data=poll_data, poll_choices=poll_choices, poll_total_votes=poll_total_votes, poll_form=poll_form, poll_results=poll_results, poll_data=poll_data, poll_choices=poll_choices, poll_total_votes=poll_total_votes,
@ -306,7 +312,7 @@ def show_post(post_id: int):
description=description, og_image=og_image, POST_TYPE_IMAGE=constants.POST_TYPE_IMAGE, description=description, og_image=og_image, POST_TYPE_IMAGE=constants.POST_TYPE_IMAGE,
POST_TYPE_LINK=constants.POST_TYPE_LINK, POST_TYPE_ARTICLE=constants.POST_TYPE_ARTICLE, POST_TYPE_LINK=constants.POST_TYPE_LINK, POST_TYPE_ARTICLE=constants.POST_TYPE_ARTICLE,
POST_TYPE_VIDEO=constants.POST_TYPE_VIDEO, POST_TYPE_POLL=constants.POST_TYPE_POLL, POST_TYPE_VIDEO=constants.POST_TYPE_VIDEO, POST_TYPE_POLL=constants.POST_TYPE_POLL,
autoplay=request.args.get('autoplay', False), autoplay=request.args.get('autoplay', False), bypass_paywall_link=bypass_paywall_link,
noindex=not post.author.indexable, preconnect=post.url if post.url else None, noindex=not post.author.indexable, preconnect=post.url if post.url else None,
recently_upvoted=recently_upvoted, recently_downvoted=recently_downvoted, recently_upvoted=recently_upvoted, recently_downvoted=recently_downvoted,
recently_upvoted_replies=recently_upvoted_replies, recently_downvoted_replies=recently_downvoted_replies, recently_upvoted_replies=recently_upvoted_replies, recently_downvoted_replies=recently_downvoted_replies,

View file

@ -1,4 +1,5 @@
from typing import List from typing import List
from urllib.parse import urlparse
from flask_login import current_user from flask_login import current_user
from sqlalchemy import desc, text, or_ from sqlalchemy import desc, text, or_
@ -78,3 +79,22 @@ def post_reply_count(post_id) -> int:
def tags_to_string(post: Post) -> str: def tags_to_string(post: Post) -> str:
if post.tags.count() > 0: if post.tags.count() > 0:
return ', '.join([tag.name for tag in post.tags]) return ', '.join([tag.name for tag in post.tags])
def url_has_paywall(url) -> bool:
paywalled_sites = ['washingtonpost.com', 'wapo.st', 'nytimes.com', 'wsj.com', 'economist.com', 'ft.com', 'telegraph.co.uk',
'bild.de', 'theatlantic.com', 'lemonde.fr']
if url:
try:
parsed_url = urlparse(url.replace('www.', ''))
hostname = parsed_url.hostname.lower()
except:
return False
return hostname in paywalled_sites
else:
return False
def generate_paywall_bypass_link(url) -> bool:
url_without_protocol = url.replace('https://', '').replace('http://', '')
return 'https://archive.ph/' + url

View file

@ -144,6 +144,9 @@
{% endif -%} {% endif -%}
<div class="post_body"> <div class="post_body">
{{ post.body_html|community_links|safe if post.body_html else '' }} {{ post.body_html|community_links|safe if post.body_html else '' }}
{% if bypass_paywall_link -%}
<p><a href="{{ bypass_paywall_link }}">{{ _('Bypass paywall') }}</a></p>
{% endif -%}
</div> </div>
{% if post.type == POST_TYPE_POLL -%} {% if post.type == POST_TYPE_POLL -%}
<div class="post_poll"> <div class="post_poll">