Optimized pagination logic

2022-12-26 18:55:47 -08:00 · 2022-12-26 18:55:47 -08:00 · 9e11c59305
commit 9e11c59305
parent 8de4584132
1 changed files with 24 additions and 22 deletions
--- a/src/gelbooru_backend.py
+++ b/src/gelbooru_backend.py
@ -47,8 +47,10 @@ def get_nsfw(post):

 def select_from_response(response, profile, max_size=None):
 	for post in response:
+		# Do not select banned tags
 		if is_banned(post, profile):
 			continue
+		# Make sure file_url keyword is in the query
 		elif "file_url" not in post:
 			continue
 		# Select only nsfw
@ -70,7 +72,7 @@ class downloader:
 	username = None
 	password = None
 	max_size = None
-	tmp = None
+	tmp = ""
 	url = ""
 	api_endpoint = "index.php?page=dapi&s=post&q=index&json=1"
 	api_tags = "&tags={}"
@ -115,14 +117,12 @@ class downloader:
 	

 	def search(self, search_url):
-		search_request = None
 		if self.username and self.password:
-			search_request = requests.get(search_url,
+			return requests.get(search_url,
 					auth=(self.username, self.password)
 					)
 		else:
-			search_request = requests.get(search_url)
-		return search_request
+			return requests.get(search_url)


 	def fetch_post(self, profile):
@ -130,26 +130,28 @@ class downloader:
 		# base_url = "https://danbooru.donmai.us/posts.json?random=true&tags={}&rating=e&limit=1"
 		tags = profile["tags"]

-		# Query to get number of pages for the tags
-		search_request = self.search(
-				self.get_full_url(1,0,*tags)
-				)
-
-		if search_request.status_code != 200:
-			print("Unable to determine number of tag indexes:", search_request.status_code)
-			return None
-
-		# Wait a second before querying again for the final picture
-		time.sleep(1)
-
-		total_posts = int(search_request.json()["@attributes"]["count"]) - self.limit - 1
-		index_count = total_posts // self.limit
-		page_offset = random.randint(0, max(0, min(index_count, self.max_depth)))
+		# First query
+		page_offset = random.randint(0, self.max_depth)
 		search_url = self.get_full_url(self.limit, page_offset, *tags)
-
 		search_request = self.search(search_url)
+		search_ok = search_request.status_code == 200

-		if search_request.status_code != 200:
+		# Second query if our page offset is too high
+		if search_ok and "post" not in search_request.json():
+			max_posts = int(search_request.json()["@attributes"]["count"])
+			total_pages = max_posts // self.limit
+
+			# There is no point in querying again if max_posts is 0
+			if max_posts <= 0:
+				return None
+
+			page_offset = random.randint(0, total_pages)
+			search_url = self.get_full_url(self.limit, page_offset, *tags)
+			time.sleep(1)
+			search_request = self.search(search_url)
+			search_ok = search_request.status_code == 200
+
+		if not search_ok:
 			print("Search request returned:", search_request.status_code)
 			return None