Optimized pagination logic

This commit is contained in:
Anon 2022-12-26 18:55:47 -08:00
parent 8de4584132
commit 9e11c59305

View File

@ -47,8 +47,10 @@ def get_nsfw(post):
def select_from_response(response, profile, max_size=None):
for post in response:
# Do not select banned tags
if is_banned(post, profile):
continue
# Make sure file_url keyword is in the query
elif "file_url" not in post:
continue
# Select only nsfw
@ -70,7 +72,7 @@ class downloader:
username = None
password = None
max_size = None
tmp = None
tmp = ""
url = ""
api_endpoint = "index.php?page=dapi&s=post&q=index&json=1"
api_tags = "&tags={}"
@ -115,14 +117,12 @@ class downloader:
def search(self, search_url):
search_request = None
if self.username and self.password:
search_request = requests.get(search_url,
return requests.get(search_url,
auth=(self.username, self.password)
)
else:
search_request = requests.get(search_url)
return search_request
return requests.get(search_url)
def fetch_post(self, profile):
@ -130,26 +130,28 @@ class downloader:
# base_url = "https://danbooru.donmai.us/posts.json?random=true&tags={}&rating=e&limit=1"
tags = profile["tags"]
# Query to get number of pages for the tags
search_request = self.search(
self.get_full_url(1,0,*tags)
)
# First query
page_offset = random.randint(0, self.max_depth)
search_url = self.get_full_url(self.limit, page_offset, *tags)
search_request = self.search(search_url)
search_ok = search_request.status_code == 200
if search_request.status_code != 200:
print("Unable to determine number of tag indexes:", search_request.status_code)
# Second query if our page offset is too high
if search_ok and "post" not in search_request.json():
max_posts = int(search_request.json()["@attributes"]["count"])
total_pages = max_posts // self.limit
# There is no point in querying again if max_posts is 0
if max_posts <= 0:
return None
# Wait a second before querying again for the final picture
time.sleep(1)
total_posts = int(search_request.json()["@attributes"]["count"]) - self.limit - 1
index_count = total_posts // self.limit
page_offset = random.randint(0, max(0, min(index_count, self.max_depth)))
page_offset = random.randint(0, total_pages)
search_url = self.get_full_url(self.limit, page_offset, *tags)
time.sleep(1)
search_request = self.search(search_url)
search_ok = search_request.status_code == 200
if search_request.status_code != 200:
if not search_ok:
print("Search request returned:", search_request.status_code)
return None