From 3cc220d1a2a5c5497655c8b5b7660245edaea679 Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Sun, 18 Aug 2024 13:12:58 +1200 Subject: [PATCH] improve robustness of nodeinfo parsing --- app/activitypub/util.py | 6 +++--- app/cli.py | 9 ++++++--- app/models.py | 3 +++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index bbda307a..dfeca8f1 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -1245,10 +1245,10 @@ def new_instance_profile_task(instance_id: int): if nodeinfo.status_code == 200: nodeinfo_json = nodeinfo.json() for links in nodeinfo_json['links']: - if 'rel' in links and ( + if isinstance(links, dict) and 'rel' in links and ( links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or # most platforms except KBIN and Lemmy v0.19.4 links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or # KBIN - links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): # Lemmy v0.19.4 (no 2.0 back-compat provided here) + links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): # Lemmy v0.19.4+ (no 2.0 back-compat provided here) try: time.sleep(0.1) node = requests.get(links['href'], headers=HEADERS, timeout=5, @@ -1260,7 +1260,7 @@ def new_instance_profile_task(instance_id: int): instance.version = node_json['software']['version'] instance.nodeinfo_href = links['href'] db.session.commit() - break # most platforms (except Lemmy v0.19.4) that provide 2.1 also provide 2.0 - there's no need to check both + break # most platforms (except Lemmy v0.19.4) that provide 2.1 also provide 2.0 - there's no need to check both except: return except: diff --git a/app/cli.py b/app/cli.py index ddedb795..02ddb2dd 100644 --- a/app/cli.py +++ b/app/cli.py @@ -218,7 +218,7 @@ def register(app): if nodeinfo.status_code == 200: nodeinfo_json = nodeinfo.json() for links in nodeinfo_json['links']: - if 'rel' in links and ( + if isinstance(links, dict) and 'rel' in links and ( links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): @@ -228,6 +228,8 @@ def register(app): db.session.commit() sleep(0.1) break + else: + instance.failures += 1 elif node.status_code >= 400: current_app.logger.info(f"{instance.domain} has no well-known/nodeinfo response") except requests.exceptions.ReadTimeout: @@ -239,8 +241,7 @@ def register(app): if instance.nodeinfo_href: try: - node = requests.get(instance.nodeinfo_href, headers=HEADERS, timeout=5, - allow_redirects=True) + node = requests.get(instance.nodeinfo_href, headers=HEADERS, timeout=5, allow_redirects=True) if node.status_code == 200: node_json = node.json() if 'software' in node_json: @@ -250,8 +251,10 @@ def register(app): instance.dormant = False elif node.status_code >= 400: instance.failures += 1 + instance.nodeinfo_href = None except requests.exceptions.RequestException: instance.failures += 1 + instance.nodeinfo_href = None if instance.failures > 7 and instance.dormant == True: instance.gone_forever = True elif instance.failures > 2 and instance.dormant == False: diff --git a/app/models.py b/app/models.py index 26d630c2..f6a2939a 100644 --- a/app/models.py +++ b/app/models.py @@ -81,6 +81,9 @@ class Instance(db.Model): role = InstanceRole.query.filter_by(instance_id=self.id, user_id=user_id).first() return role and role.role == 'admin' + def __repr__(self): + return ''.format(self.domain) + class InstanceRole(db.Model): instance_id = db.Column(db.Integer, db.ForeignKey('instance.id'), primary_key=True)