improve robustness of nodeinfo parsing

This commit is contained in:
rimu 2024-08-18 13:12:58 +12:00
parent c4ac53e92a
commit 3cc220d1a2
3 changed files with 12 additions and 6 deletions

View file

@ -1245,10 +1245,10 @@ def new_instance_profile_task(instance_id: int):
if nodeinfo.status_code == 200: if nodeinfo.status_code == 200:
nodeinfo_json = nodeinfo.json() nodeinfo_json = nodeinfo.json()
for links in nodeinfo_json['links']: for links in nodeinfo_json['links']:
if 'rel' in links and ( if isinstance(links, dict) and 'rel' in links and (
links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or # most platforms except KBIN and Lemmy v0.19.4 links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or # most platforms except KBIN and Lemmy v0.19.4
links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or # KBIN links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or # KBIN
links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): # Lemmy v0.19.4 (no 2.0 back-compat provided here) links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): # Lemmy v0.19.4+ (no 2.0 back-compat provided here)
try: try:
time.sleep(0.1) time.sleep(0.1)
node = requests.get(links['href'], headers=HEADERS, timeout=5, node = requests.get(links['href'], headers=HEADERS, timeout=5,
@ -1260,7 +1260,7 @@ def new_instance_profile_task(instance_id: int):
instance.version = node_json['software']['version'] instance.version = node_json['software']['version']
instance.nodeinfo_href = links['href'] instance.nodeinfo_href = links['href']
db.session.commit() db.session.commit()
break # most platforms (except Lemmy v0.19.4) that provide 2.1 also provide 2.0 - there's no need to check both break # most platforms (except Lemmy v0.19.4) that provide 2.1 also provide 2.0 - there's no need to check both
except: except:
return return
except: except:

View file

@ -218,7 +218,7 @@ def register(app):
if nodeinfo.status_code == 200: if nodeinfo.status_code == 200:
nodeinfo_json = nodeinfo.json() nodeinfo_json = nodeinfo.json()
for links in nodeinfo_json['links']: for links in nodeinfo_json['links']:
if 'rel' in links and ( if isinstance(links, dict) and 'rel' in links and (
links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or
links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0' or
links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'): links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.1'):
@ -228,6 +228,8 @@ def register(app):
db.session.commit() db.session.commit()
sleep(0.1) sleep(0.1)
break break
else:
instance.failures += 1
elif node.status_code >= 400: elif node.status_code >= 400:
current_app.logger.info(f"{instance.domain} has no well-known/nodeinfo response") current_app.logger.info(f"{instance.domain} has no well-known/nodeinfo response")
except requests.exceptions.ReadTimeout: except requests.exceptions.ReadTimeout:
@ -239,8 +241,7 @@ def register(app):
if instance.nodeinfo_href: if instance.nodeinfo_href:
try: try:
node = requests.get(instance.nodeinfo_href, headers=HEADERS, timeout=5, node = requests.get(instance.nodeinfo_href, headers=HEADERS, timeout=5, allow_redirects=True)
allow_redirects=True)
if node.status_code == 200: if node.status_code == 200:
node_json = node.json() node_json = node.json()
if 'software' in node_json: if 'software' in node_json:
@ -250,8 +251,10 @@ def register(app):
instance.dormant = False instance.dormant = False
elif node.status_code >= 400: elif node.status_code >= 400:
instance.failures += 1 instance.failures += 1
instance.nodeinfo_href = None
except requests.exceptions.RequestException: except requests.exceptions.RequestException:
instance.failures += 1 instance.failures += 1
instance.nodeinfo_href = None
if instance.failures > 7 and instance.dormant == True: if instance.failures > 7 and instance.dormant == True:
instance.gone_forever = True instance.gone_forever = True
elif instance.failures > 2 and instance.dormant == False: elif instance.failures > 2 and instance.dormant == False:

View file

@ -81,6 +81,9 @@ class Instance(db.Model):
role = InstanceRole.query.filter_by(instance_id=self.id, user_id=user_id).first() role = InstanceRole.query.filter_by(instance_id=self.id, user_id=user_id).first()
return role and role.role == 'admin' return role and role.role == 'admin'
def __repr__(self):
return '<Instance {}>'.format(self.domain)
class InstanceRole(db.Model): class InstanceRole(db.Model):
instance_id = db.Column(db.Integer, db.ForeignKey('instance.id'), primary_key=True) instance_id = db.Column(db.Integer, db.ForeignKey('instance.id'), primary_key=True)