Optimized pagination logic
This commit is contained in:
parent
8de4584132
commit
9e11c59305
@ -47,8 +47,10 @@ def get_nsfw(post):
|
||||
|
||||
def select_from_response(response, profile, max_size=None):
|
||||
for post in response:
|
||||
# Do not select banned tags
|
||||
if is_banned(post, profile):
|
||||
continue
|
||||
# Make sure file_url keyword is in the query
|
||||
elif "file_url" not in post:
|
||||
continue
|
||||
# Select only nsfw
|
||||
@ -70,7 +72,7 @@ class downloader:
|
||||
username = None
|
||||
password = None
|
||||
max_size = None
|
||||
tmp = None
|
||||
tmp = ""
|
||||
url = ""
|
||||
api_endpoint = "index.php?page=dapi&s=post&q=index&json=1"
|
||||
api_tags = "&tags={}"
|
||||
@ -115,14 +117,12 @@ class downloader:
|
||||
|
||||
|
||||
def search(self, search_url):
|
||||
search_request = None
|
||||
if self.username and self.password:
|
||||
search_request = requests.get(search_url,
|
||||
return requests.get(search_url,
|
||||
auth=(self.username, self.password)
|
||||
)
|
||||
else:
|
||||
search_request = requests.get(search_url)
|
||||
return search_request
|
||||
return requests.get(search_url)
|
||||
|
||||
|
||||
def fetch_post(self, profile):
|
||||
@ -130,26 +130,28 @@ class downloader:
|
||||
# base_url = "https://danbooru.donmai.us/posts.json?random=true&tags={}&rating=e&limit=1"
|
||||
tags = profile["tags"]
|
||||
|
||||
# Query to get number of pages for the tags
|
||||
search_request = self.search(
|
||||
self.get_full_url(1,0,*tags)
|
||||
)
|
||||
|
||||
if search_request.status_code != 200:
|
||||
print("Unable to determine number of tag indexes:", search_request.status_code)
|
||||
return None
|
||||
|
||||
# Wait a second before querying again for the final picture
|
||||
time.sleep(1)
|
||||
|
||||
total_posts = int(search_request.json()["@attributes"]["count"]) - self.limit - 1
|
||||
index_count = total_posts // self.limit
|
||||
page_offset = random.randint(0, max(0, min(index_count, self.max_depth)))
|
||||
# First query
|
||||
page_offset = random.randint(0, self.max_depth)
|
||||
search_url = self.get_full_url(self.limit, page_offset, *tags)
|
||||
|
||||
search_request = self.search(search_url)
|
||||
search_ok = search_request.status_code == 200
|
||||
|
||||
if search_request.status_code != 200:
|
||||
# Second query if our page offset is too high
|
||||
if search_ok and "post" not in search_request.json():
|
||||
max_posts = int(search_request.json()["@attributes"]["count"])
|
||||
total_pages = max_posts // self.limit
|
||||
|
||||
# There is no point in querying again if max_posts is 0
|
||||
if max_posts <= 0:
|
||||
return None
|
||||
|
||||
page_offset = random.randint(0, total_pages)
|
||||
search_url = self.get_full_url(self.limit, page_offset, *tags)
|
||||
time.sleep(1)
|
||||
search_request = self.search(search_url)
|
||||
search_ok = search_request.status_code == 200
|
||||
|
||||
if not search_ok:
|
||||
print("Search request returned:", search_request.status_code)
|
||||
return None
|
||||
|
||||
|
Reference in New Issue
Block a user