refresh instance stats during daily maintenance #188

This commit is contained in:
rimu 2024-05-21 18:07:07 +12:00
parent 32de8aad9f
commit 6495169659
4 changed files with 85 additions and 5 deletions

View file

@ -312,7 +312,7 @@ def find_actor_or_create(actor: str, create_if_not_found=True, community_only=Fa
refresh_user_profile(user.id)
elif isinstance(user, Community):
refresh_community_profile(user.id)
refresh_instance_profile(user.instance_id)
# refresh_instance_profile(user.instance_id) # disable in favour of cron job - see app.cli.daily_maintenance()
return user
else: # User does not exist in the DB, it's going to need to be created from it's remote home instance
if create_if_not_found:

View file

@ -3,8 +3,10 @@
import imaplib
import re
from datetime import datetime, timedelta
from time import sleep
import flask
import requests
from flask import json, current_app
from flask_babel import _
from sqlalchemy import or_, desc, text
@ -165,16 +167,56 @@ def register(app):
@app.cli.command('daily-maintenance')
def daily_maintenance():
with app.app_context():
"""Remove activity older than 3 days"""
# Remove activity older than 3 days
db.session.query(ActivityPubLog).filter(ActivityPubLog.created_at < utcnow() - timedelta(days=3)).delete()
db.session.commit()
# Ensure accurate count of posts associated with each hashtag
for tag in Tag.query.all():
post_count = db.session.execute(text('SELECT COUNT(post_id) as c FROM "post_tag" WHERE tag_id = :tag_id'),
{ 'tag_id': tag.id}).scalar()
tag.post_count = post_count
db.session.commit()
# Check for dormant or dead instances
instances = Instance.query.filter(Instance.gone_forever == False, Instance.id != 1).all()
HEADERS = {'User-Agent': 'PieFed/1.0', 'Accept': 'application/activity+json'}
for instance in instances:
try:
nodeinfo = requests.get(f"https://{instance.domain}/.well-known/nodeinfo", headers=HEADERS,
timeout=5, allow_redirects=True)
if nodeinfo.status_code == 200:
nodeinfo_json = nodeinfo.json()
for links in nodeinfo_json['links']:
if 'rel' in links and (
links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or
links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0'):
try:
sleep(0.1)
node = requests.get(links['href'], headers=HEADERS, timeout=5,
allow_redirects=True)
if node.status_code == 200:
node_json = node.json()
if 'software' in node_json:
instance.software = node_json['software']['name']
instance.version = node_json['software']['version']
instance.failures = 0
instance.dormant = False
elif node.status_code >= 400:
instance.failures += 1
except:
instance.failures += 1
elif nodeinfo.status_code >= 400:
instance.failures += 1
except:
instance.failures += 1
if instance.failures > 7 and instance.dormant == True:
instance.gone_forever = True
elif instance.failures > 2 and instance.dormant == False:
instance.dormant = True
db.session.commit()
@app.cli.command("spaceusage")
def spaceusage():
with app.app_context():

View file

@ -2,6 +2,7 @@ import os.path
from datetime import datetime, timedelta
from math import log
from random import randint
from time import sleep
import flask
import markdown2
@ -320,8 +321,45 @@ def list_files(directory):
@bp.route('/test')
def test():
test_html = ''
return microblog_content_to_title(test_html)
# Check for dormant or dead instances
instances = Instance.query.filter(Instance.gone_forever == False, Instance.id != 1).all()
HEADERS = {'User-Agent': 'PieFed/1.0', 'Accept': 'application/activity+json'}
for instance in instances:
try:
nodeinfo = requests.get(f"https://{instance.domain}/.well-known/nodeinfo", headers=HEADERS,
timeout=5, allow_redirects=True)
if nodeinfo.status_code == 200:
nodeinfo_json = nodeinfo.json()
for links in nodeinfo_json['links']:
if 'rel' in links and (links['rel'] == 'http://nodeinfo.diaspora.software/ns/schema/2.0' or
links['rel'] == 'https://nodeinfo.diaspora.software/ns/schema/2.0'):
try:
sleep(0.1)
node = requests.get(links['href'], headers=HEADERS, timeout=5, allow_redirects=True)
if node.status_code == 200:
node_json = node.json()
if 'software' in node_json:
instance.software = node_json['software']['name']
instance.version = node_json['software']['version']
instance.failures = 0
instance.dormant = False
elif node.status_code >= 400:
instance.failures += 1
except:
instance.failures += 1
elif nodeinfo.status_code >= 400:
instance.failures += 1
except:
instance.failures += 1
if instance.failures > 7 and instance.dormant == True:
instance.gone_forever = True
elif instance.failures > 2 and instance.dormant == False:
instance.dormant = True
db.session.commit()
return 'done'
md = "::: spoiler I'm all for ya having fun and your right to hurt yourself.\n\nI am a former racer, commuter, and professional Buyer for a chain of bike shops. I'm also disabled from the crash involving the 6th and 7th cars that have hit me in the last 170k+ miles of riding. I only barely survived what I simplify as a \"broken neck and back.\" Cars making U-turns are what will get you if you ride long enough, \n\nespecially commuting. It will look like just another person turning in front of you, you'll compensate like usual, and before your brain can even register what is really happening, what was your normal escape route will close and you're going to crash really hard. It is the only kind of crash that your intuition is useless against.\n:::"

View file

@ -180,7 +180,7 @@ def allowlist_html(html: str) -> str:
if html is None or html == '':
return ''
allowed_tags = ['p', 'strong', 'a', 'ul', 'ol', 'li', 'em', 'blockquote', 'cite', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre',
'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead', 'hr', 'span', 'small']
'code', 'img', 'details', 'summary', 'table', 'tr', 'td', 'th', 'tbody', 'thead', 'hr', 'span', 'small', 'sub', 'sup']
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')