Optimized pagination logic

This commit is contained in:
Anon 2022-12-26 18:55:47 -08:00
parent 8de4584132
commit 9e11c59305

View File

@ -47,8 +47,10 @@ def get_nsfw(post):
def select_from_response(response, profile, max_size=None): def select_from_response(response, profile, max_size=None):
for post in response: for post in response:
# Do not select banned tags
if is_banned(post, profile): if is_banned(post, profile):
continue continue
# Make sure file_url keyword is in the query
elif "file_url" not in post: elif "file_url" not in post:
continue continue
# Select only nsfw # Select only nsfw
@ -70,7 +72,7 @@ class downloader:
username = None username = None
password = None password = None
max_size = None max_size = None
tmp = None tmp = ""
url = "" url = ""
api_endpoint = "index.php?page=dapi&s=post&q=index&json=1" api_endpoint = "index.php?page=dapi&s=post&q=index&json=1"
api_tags = "&tags={}" api_tags = "&tags={}"
@ -115,14 +117,12 @@ class downloader:
def search(self, search_url): def search(self, search_url):
search_request = None
if self.username and self.password: if self.username and self.password:
search_request = requests.get(search_url, return requests.get(search_url,
auth=(self.username, self.password) auth=(self.username, self.password)
) )
else: else:
search_request = requests.get(search_url) return requests.get(search_url)
return search_request
def fetch_post(self, profile): def fetch_post(self, profile):
@ -130,26 +130,28 @@ class downloader:
# base_url = "https://danbooru.donmai.us/posts.json?random=true&tags={}&rating=e&limit=1" # base_url = "https://danbooru.donmai.us/posts.json?random=true&tags={}&rating=e&limit=1"
tags = profile["tags"] tags = profile["tags"]
# Query to get number of pages for the tags # First query
search_request = self.search( page_offset = random.randint(0, self.max_depth)
self.get_full_url(1,0,*tags)
)
if search_request.status_code != 200:
print("Unable to determine number of tag indexes:", search_request.status_code)
return None
# Wait a second before querying again for the final picture
time.sleep(1)
total_posts = int(search_request.json()["@attributes"]["count"]) - self.limit - 1
index_count = total_posts // self.limit
page_offset = random.randint(0, max(0, min(index_count, self.max_depth)))
search_url = self.get_full_url(self.limit, page_offset, *tags) search_url = self.get_full_url(self.limit, page_offset, *tags)
search_request = self.search(search_url) search_request = self.search(search_url)
search_ok = search_request.status_code == 200
if search_request.status_code != 200: # Second query if our page offset is too high
if search_ok and "post" not in search_request.json():
max_posts = int(search_request.json()["@attributes"]["count"])
total_pages = max_posts // self.limit
# There is no point in querying again if max_posts is 0
if max_posts <= 0:
return None
page_offset = random.randint(0, total_pages)
search_url = self.get_full_url(self.limit, page_offset, *tags)
time.sleep(1)
search_request = self.search(search_url)
search_ok = search_request.status_code == 200
if not search_ok:
print("Search request returned:", search_request.status_code) print("Search request returned:", search_request.status_code)
return None return None