Refactored to retry when overshooting pagination to fetch the last page and try again

2023-10-19 21:22:07 -07:00 · 2023-10-19 21:22:07 -07:00 · 22c5a6e231
commit 22c5a6e231
parent 88e11f58bc
1 changed files with 125 additions and 106 deletions
--- a/src/konachan_backend.py
+++ b/src/konachan_backend.py
@ -22,6 +22,8 @@
 import requests
 import os
 import random
+import re
+import time

 def random_tag(*tags):
    return len(tags) == 1 and tags[0].lower() == "random"
@ -43,7 +45,6 @@ def is_banned(post, profile):
 def get_nsfw(post):
    return post["rating"] in ("q", "e")

-
 def select_from_response(response, profile, max_size=None):
    for post in response:
        if is_banned(post, profile):
@ -66,22 +67,16 @@ def select_from_response(response, profile, max_size=None):


 class downloader:
-	username = None
-	password = None
-	max_size = None
-	tmp = None
-	url = ""
-	api_endpoint = "post.json?random=true&limit=100"
-	api_tags = "&tags={}"
-	api_limit = "&limit={}"
-	api_offset = "&page={}"
-	limit = 100
-	max_depth = 200
-
    def __init__(self, backend_credentials):
+        self.api_endpoint = "{}/post/index.json?limit={}&page={}"
+        self.html_endpoint = "{}/post/index?limit={}&page={}"
+        self.tag_url = "&tags={}"
+        self.limit = 100
+        self.retry_limit = 3
+
        self.username = backend_credentials["username"]
        self.password = backend_credentials["password"]
-		self.max_size = backend_credentials["max_size"]
+        self.depth = backend_credentials["max_size"]
        self.tmp = backend_credentials["tmp_dir"]
        self.url = backend_credentials["url"]
        self.max_depth = backend_credentials["max_depth"]
@ -103,16 +98,6 @@ class downloader:

        return post

-	def get_full_url(self, limit=100, offset=0, *tags):
-		search_url = "/".join((self.url, self.api_endpoint))
-		search_url += self.api_limit.format(str(limit))
-		search_url += self.api_offset.format(str(offset))
-		if tags and not random_tag(*tags):
-			search_tags = "+".join(tags)
-			search_url += self.api_tags.format(search_tags)
-		return search_url
-	
-
    def search(self, search_url):
        search_request = None
        if self.username and self.password:
@ -123,26 +108,60 @@ class downloader:
            search_request = requests.get(search_url)
        return search_request

+    # I suck at regex :(
+    def get_max_page(self, html):
+        match = re.findall('page=[0-9]*', html)
+        if match:
+            last_group = match[len(match) - 1]
+            last_page = last_group.rsplit("=", 1)[1]
+            return int(last_page)
+        else:
+            return None

    def fetch_post(self, profile):
-		# Search ratings: s=safe, e=nsfw
        tags = profile["tags"]
+        selected = dict()
+        max_depth = self.max_depth
+        search_url_tags = "+".join(tags)
+        search_url = ""
+
+        for _ in range(0, self.retry_limit):
+            page_offset = random.randint(0, max_depth)
+            search_url = self.api_endpoint.format(self.url, self.limit, page_offset)
+            search_url_html = self.html_endpoint.format(self.url, self.limit, page_offset)
+            if search_url_tags:
+                search_url += self.tag_url.format(search_url_tags)
+                search_url_html += self.tag_url.format(search_url_tags)

-		page_offset = random.randint(0, self.max_depth)
-		search_url = self.get_full_url(self.limit, page_offset, *tags)
            search_request = self.search(search_url)

            if search_request.status_code != 200:
                print("Search request returned:", search_request.status_code)
-			return None
+                continue

            posts = search_request.json()
            random.shuffle(posts)

-		selected = select_from_response(posts, profile, self.max_size)
+            selected = select_from_response(posts, profile)

            if selected is None:
                print("Could not select image based on criteria")
+                time.sleep(2)
+                search_request = self.search(search_url_html)
+
+                if search_request.status_code == 200:
+                    new_max_depth = self.get_max_page(search_request.text)
+                    if new_max_depth < max_depth:
+                        max_depth = new_max_depth
+                    else:
+                        max_depth = max_depth // 2
+                else:
+                    max_depth = max_depth // 2
+                continue
+
+            break
+
+        if not selected:
            return None

        tag_response = collect_tags(selected)