add bypass paywall links for several sites #205

2025-02-03 00:31:25 -08:00 · 2024-06-16 19:03:23 +08:00 · 2024-06-16 19:03:23 +08:00 · 811c9eb5f3
commit 811c9eb5f3
parent 3f2690a42b
2 changed files with 7 additions and 4 deletions
--- a/app/post/routes.py
+++ b/app/post/routes.py
@ -16,7 +16,7 @@ from app.inoculation import inoculation
 from app.post.forms import NewReplyForm, ReportPostForm, MeaCulpaForm
 from app.community.forms import CreateLinkForm, CreateImageForm, CreateDiscussionForm, CreateVideoForm, CreatePollForm
 from app.post.util import post_replies, get_comment_branch, post_reply_count, tags_to_string, url_has_paywall, \
-    generate_paywall_bypass_link
+    generate_paywall_bypass_link, body_has_no_paywall_link
 from app.constants import SUBSCRIPTION_MEMBER, SUBSCRIPTION_OWNER, SUBSCRIPTION_MODERATOR, POST_TYPE_LINK, \
    POST_TYPE_IMAGE, \
    POST_TYPE_ARTICLE, POST_TYPE_VIDEO, NOTIF_REPLY, NOTIF_POST, POST_TYPE_POLL
@ -302,7 +302,7 @@ def show_post(post_id: int):

    # Bypass paywalls link
    bypass_paywall_link = None
-    if post.type == POST_TYPE_LINK and 'https://archive.' not in post.body_html and url_has_paywall(post.url):
+    if post.type == POST_TYPE_LINK and body_has_no_paywall_link(post.body_html) and url_has_paywall(post.url):
        bypass_paywall_link = generate_paywall_bypass_link(post.url)

    response = render_template('post/post.html', title=post.title, post=post, is_moderator=is_moderator, community=post.community,
--- a/app/post/util.py
+++ b/app/post/util.py
@ -81,9 +81,13 @@ def tags_to_string(post: Post) -> str:
        return ', '.join([tag.name for tag in post.tags])


+def body_has_no_paywall_link(body):
+    return 'https://archive.' not in body and 'https://12ft.io' not in body
+
+
 def url_has_paywall(url) -> bool:
    paywalled_sites = ['washingtonpost.com', 'wapo.st', 'nytimes.com', 'wsj.com', 'economist.com', 'ft.com', 'telegraph.co.uk',
-                       'bild.de', 'theatlantic.com', 'lemonde.fr']
+                       'bild.de', 'theatlantic.com', 'lemonde.fr', 'nzherald.co.nz']
    if url:
        try:
            parsed_url = urlparse(url.replace('www.', ''))
@ -96,5 +100,4 @@ def url_has_paywall(url) -> bool:


 def generate_paywall_bypass_link(url) -> bool:
-    url_without_protocol = url.replace('https://', '').replace('http://', '')
    return 'https://archive.ph/' + url