From ea224e291b2b143f304e1e96e5ff2a37cd6d351d Mon Sep 17 00:00:00 2001 From: rimu <3310831+rimu@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:12:19 +1300 Subject: [PATCH] better source for pytesseract will result in more reliable ocr --- app/activitypub/util.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/app/activitypub/util.py b/app/activitypub/util.py index 4826b3b1..9a9881ea 100644 --- a/app/activitypub/util.py +++ b/app/activitypub/util.py @@ -554,7 +554,7 @@ def make_image_sizes_async(file_id, thumbnail_width, medium_width, directory): # Alert regarding fascist meme content try: - image_text = pytesseract.image_to_string(Image.open(final_place).convert('L')) + image_text = pytesseract.image_to_string(Image.open(BytesIO(source_image)).convert('L')) except FileNotFoundError as e: image_text = '' if 'Anonymous' in image_text and ('No.' in image_text or ' N0' in image_text): # chan posts usually contain the text 'Anonymous' and ' No.12345' @@ -1164,6 +1164,22 @@ def undo_vote(activity_log, comment, post, target_ap_id, user): return post +# given an activitypub id for a post or comment, retrieve it and all it's parent objects +def backfill_from_ap_id(ap_id: str): + if ap_id.startswith(f"https://{current_app.config['SERVER_NAME']}"): + ... + else: + try: + activity_data = get_request(ap_id, headers={'Accept': 'application/activity+json'}) + except requests.exceptions.ReadTimeout: + time.sleep(randint(3, 10)) + activity_data = get_request(ap_id, headers={'Accept': 'application/activity+json'}) + if activity_data.status_code == 200: + actor_json = activity_data.json() + activity_data.close() + return actor_json_to_model(actor_json, address, server) + + def lemmy_site_data(): site = g.site data = {