From 9be6ae18f8191a4bd6b5befd2ebfab8c2e4b8fed Mon Sep 17 00:00:00 2001 From: freamon Date: Sat, 16 Mar 2024 20:17:24 +0000 Subject: [PATCH 1/3] Preserve case for fetching actors (req. for KBIN) --- app/activitypub/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index dfbee16c..aa1ae32f 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -200,6 +200,7 @@ def instance_allowed(host: str) -> bool: def find_actor_or_create(actor: str, create_if_not_found=True, community_only=False) -> Union[User, Community, None]: + actor_url = actor.strip() actor = actor.strip().lower() user = None # actor parameter must be formatted as https://server/u/actor or https://server/c/actor @@ -244,10 +245,10 @@ def find_actor_or_create(actor: str, create_if_not_found=True, community_only=Fa if create_if_not_found: if actor.startswith('https://'): try: - actor_data = get_request(actor, headers={'Accept': 'application/activity+json'}) + actor_data = get_request(actor_url, headers={'Accept': 'application/activity+json'}) except requests.exceptions.ReadTimeout: time.sleep(randint(3, 10)) - actor_data = get_request(actor, headers={'Accept': 'application/activity+json'}) + actor_data = get_request(actor_url, headers={'Accept': 'application/activity+json'}) if actor_data.status_code == 200: actor_json = actor_data.json() actor_data.close() From 90a89c47e2ec77c92184af6ca582d9ccac25dfc5 Mon Sep 17 00:00:00 2001 From: freamon Date: Sat, 16 Mar 2024 20:33:48 +0000 Subject: [PATCH 2/3] Increase exception catching when fetching actors --- app/activitypub/util.py | 7 ++++++- app/utils.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index aa1ae32f..4f38ca7c 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -248,7 +248,12 @@ def find_actor_or_create(actor: str, create_if_not_found=True, community_only=Fa actor_data = get_request(actor_url, headers={'Accept': 'application/activity+json'}) except requests.exceptions.ReadTimeout: time.sleep(randint(3, 10)) - actor_data = get_request(actor_url, headers={'Accept': 'application/activity+json'}) + try: + actor_data = get_request(actor_url, headers={'Accept': 'application/activity+json'}) + except requests.exceptions.ReadTimeout: + return None + except requests.exceptions.ConnectionError: + return None if actor_data.status_code == 200: actor_json = actor_data.json() actor_data.close() diff --git a/app/utils.py b/app/utils.py index e6ca7c59..ea34351b 100644 --- a/app/utils.py +++ b/app/utils.py @@ -91,6 +91,9 @@ def get_request(uri, params=None, headers=None) -> requests.Response: except requests.exceptions.ReadTimeout as read_timeout: current_app.logger.info(f"{uri} {read_timeout}") raise requests.exceptions.ReadTimeout from read_timeout + except requests.exceptions.ConnectionError as connection_error: + current_app.logger.info(f"{uri} {connection_error}") + raise requests.exceptions.ConnectionError from connection_error return response From fcdc5afbf40ae52c07fa994d5c012efea1bf01c8 Mon Sep 17 00:00:00 2001 From: freamon Date: Sat, 16 Mar 2024 20:39:16 +0000 Subject: [PATCH 3/3] Ignore spurious warning from BeautifulSoup --- app/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/utils.py b/app/utils.py index ea34351b..47feb13c 100644 --- a/app/utils.py +++ b/app/utils.py @@ -13,7 +13,9 @@ import math from urllib.parse import urlparse, parse_qs, urlencode from functools import wraps import flask -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, MarkupResemblesLocatorWarning +import warnings +warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) import requests import os from flask import current_app, json, redirect, url_for, request, make_response, Response, g