retry get requests

This commit is contained in:
rimu 2024-09-09 08:43:10 +12:00
parent 0da0845abe
commit 1ef01d12ab
3 changed files with 17 additions and 8 deletions

View file

@ -2805,14 +2805,14 @@ def resolve_remote_post_from_search(uri: str) -> Union[Post, None]:
if not community and post_data['type'] == 'Page': # lemmy
if 'audience' in post_data:
community_id = post_data['audience']
community = Community.query.filter_by(ap_profile_id=community_id).first()
community = find_actor_or_create(community_id, community_only=True)
if not community and post_data['type'] == 'Video': # peertube
if 'attributedTo' in post_data and isinstance(post_data['attributedTo'], list):
for a in post_data['attributedTo']:
if a['type'] == 'Group':
community_id = a['id']
community = Community.query.filter_by(ap_profile_id=community_id).first()
community = find_actor_or_create(community_id, community_only=True)
if community:
break

View file

@ -7,7 +7,7 @@ from app.models import Post, Language, Community, Instance
from app.search import bp
from app.utils import moderating_communities, joined_communities, render_template, blocked_domains, blocked_instances, \
communities_banned_from, recently_upvoted_posts, recently_downvoted_posts, blocked_users, menu_topics, \
blocked_communities
blocked_communities, show_ban_message
from app.community.forms import RetrieveRemotePost
from app.activitypub.util import resolve_remote_post_from_search

View file

@ -8,6 +8,7 @@ import tempfile
import urllib
from collections import defaultdict
from datetime import datetime, timedelta, date
from time import sleep
from typing import List, Literal, Union
import markdown2
@ -81,6 +82,7 @@ def getmtime(filename):
# do a GET request to a uri, return the result
def get_request(uri, params=None, headers=None) -> requests.Response:
timeout = 15 if 'washingtonpost.com' in uri else 5 # Washington Post is really slow on og:image for some reason
if headers is None:
headers = {'User-Agent': 'PieFed/1.0'}
else:
@ -90,7 +92,6 @@ def get_request(uri, params=None, headers=None) -> requests.Response:
else:
payload_str = urllib.parse.urlencode(params) if params else None
try:
timeout = 15 if 'washingtonpost.com' in uri else 5 # Washington Post is really slow on og:image for some reason
response = requests.get(uri, params=payload_str, headers=headers, timeout=timeout, allow_redirects=True)
except requests.exceptions.SSLError as invalid_cert:
# Not our problem if the other end doesn't have proper SSL
@ -100,11 +101,19 @@ def get_request(uri, params=None, headers=None) -> requests.Response:
# Convert to a more generic error we handle
raise requests.exceptions.RequestException(f"InvalidCodepoint: {str(ex)}") from None
except requests.exceptions.ReadTimeout as read_timeout:
current_app.logger.info(f"{uri} {read_timeout}")
raise requests.exceptions.ReadTimeout from read_timeout
try: # retry, this time with a longer timeout
sleep(random.randint(3, 10))
response = requests.get(uri, params=payload_str, headers=headers, timeout=timeout * 2, allow_redirects=True)
except Exception as e:
current_app.logger.info(f"{uri} {read_timeout}")
raise requests.exceptions.ReadTimeout from read_timeout
except requests.exceptions.ConnectionError as connection_error:
current_app.logger.info(f"{uri} {connection_error}")
raise requests.exceptions.ConnectionError from connection_error
try: # retry, this time with a longer timeout
sleep(random.randint(3, 10))
response = requests.get(uri, params=payload_str, headers=headers, timeout=timeout * 2, allow_redirects=True)
except Exception as e:
current_app.logger.info(f"{uri} {connection_error}")
raise requests.exceptions.ConnectionError from connection_error
return response