Changed whitespace and added additional debug and wait information in konachan backend
This commit is contained in:
parent
76b008a499
commit
02f454d32d
@ -26,161 +26,163 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
def random_tag(*tags):
|
def random_tag(*tags):
|
||||||
return len(tags) == 1 and tags[0].lower() == "random"
|
return len(tags) == 1 and tags[0].lower() == "random"
|
||||||
|
|
||||||
|
|
||||||
def collect_tags(post):
|
def collect_tags(post):
|
||||||
return post["tags"].strip().lower().split()
|
return post["tags"].strip().lower().split()
|
||||||
|
|
||||||
|
|
||||||
def is_banned(post, profile):
|
def is_banned(post, profile):
|
||||||
tag_response = collect_tags(post)
|
tag_response = collect_tags(post)
|
||||||
tag_banned = profile["banned_tags"]
|
tag_banned = profile["banned_tags"]
|
||||||
for tag in tag_banned:
|
for tag in tag_banned:
|
||||||
if tag in tag_response:
|
if tag in tag_response:
|
||||||
return tag
|
return tag
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_nsfw(post):
|
def get_nsfw(post):
|
||||||
return post["rating"] in ("q", "e")
|
return post["rating"] in ("q", "e")
|
||||||
|
|
||||||
def select_from_response(response, profile, max_size=None):
|
def select_from_response(response, profile, max_size=None):
|
||||||
for post in response:
|
for post in response:
|
||||||
if is_banned(post, profile):
|
if is_banned(post, profile):
|
||||||
continue
|
continue
|
||||||
elif "file_url" not in post:
|
elif "file_url" not in post:
|
||||||
continue
|
continue
|
||||||
# Select only nsfw
|
# Select only nsfw
|
||||||
elif ( profile["force_nsfw"] is not None and
|
elif ( profile["force_nsfw"] is not None and
|
||||||
profile["force_nsfw"] != get_nsfw(post)
|
profile["force_nsfw"] != get_nsfw(post)
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
# Make sure serverside size is not larger than max_size
|
# Make sure serverside size is not larger than max_size
|
||||||
elif ( max_size != None and
|
elif ( max_size != None and
|
||||||
"file_size" in post and
|
"file_size" in post and
|
||||||
post["file_size"] > max_size
|
post["file_size"] > max_size
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
return post
|
return post
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
class downloader:
|
class downloader:
|
||||||
def __init__(self, backend_credentials):
|
def __init__(self, backend_credentials):
|
||||||
self.api_endpoint = "{}/post/index.json?limit={}&page={}"
|
self.api_endpoint = "{}/post/index.json?limit={}&page={}"
|
||||||
self.html_endpoint = "{}/post/index?limit={}&page={}"
|
self.html_endpoint = "{}/post/index?limit={}&page={}"
|
||||||
self.tag_url = "&tags={}"
|
self.tag_url = "&tags={}"
|
||||||
self.limit = 100
|
self.limit = 100
|
||||||
self.retry_limit = 3
|
self.retry_limit = 3
|
||||||
|
|
||||||
self.username = backend_credentials["username"]
|
self.username = backend_credentials["username"]
|
||||||
self.password = backend_credentials["password"]
|
self.password = backend_credentials["password"]
|
||||||
self.depth = backend_credentials["max_size"]
|
self.depth = backend_credentials["max_size"]
|
||||||
self.tmp = backend_credentials["tmp_dir"]
|
self.tmp = backend_credentials["tmp_dir"]
|
||||||
self.url = backend_credentials["url"]
|
self.url = backend_credentials["url"]
|
||||||
self.max_depth = backend_credentials["max_depth"]
|
self.max_depth = backend_credentials["max_depth"]
|
||||||
random.seed(os.urandom(16))
|
random.seed(os.urandom(16))
|
||||||
|
|
||||||
|
|
||||||
def download_post(self, post):
|
def download_post(self, post):
|
||||||
file_url = post["file_url"]
|
file_url = post["file_url"]
|
||||||
full_path = post["full_path"]
|
full_path = post["full_path"]
|
||||||
|
|
||||||
remote_image = requests.get(file_url)
|
remote_image = requests.get(file_url)
|
||||||
|
|
||||||
if remote_image.status_code != 200:
|
if remote_image.status_code != 200:
|
||||||
print("Remote image request returned:", remote_image.status_code)
|
print("Remote image request returned:", remote_image.status_code)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with open(full_path, "wb") as f:
|
with open(full_path, "wb") as f:
|
||||||
f.write(remote_image.content)
|
f.write(remote_image.content)
|
||||||
|
|
||||||
return post
|
return post
|
||||||
|
|
||||||
def search(self, search_url):
|
def search(self, search_url):
|
||||||
search_request = None
|
search_request = None
|
||||||
if self.username and self.password:
|
if self.username and self.password:
|
||||||
search_request = requests.get(search_url,
|
search_request = requests.get(search_url,
|
||||||
auth=(self.username, self.password)
|
auth=(self.username, self.password)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
search_request = requests.get(search_url)
|
search_request = requests.get(search_url)
|
||||||
return search_request
|
return search_request
|
||||||
|
|
||||||
# I suck at regex :(
|
# I suck at regex :(
|
||||||
def get_max_page(self, html):
|
def get_max_page(self, html):
|
||||||
match = re.findall('page=[0-9]*', html)
|
match = re.findall('page=[0-9]*', html)
|
||||||
if match:
|
if match:
|
||||||
last_group = match[len(match) - 1]
|
last_group = match[len(match) - 1]
|
||||||
last_page = last_group.rsplit("=", 1)[1]
|
last_page = last_group.rsplit("=", 1)[1]
|
||||||
return int(last_page)
|
return int(last_page)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def fetch_post(self, profile):
|
def fetch_post(self, profile):
|
||||||
tags = profile["tags"]
|
tags = profile["tags"]
|
||||||
selected = dict()
|
selected = dict()
|
||||||
max_depth = self.max_depth
|
max_depth = self.max_depth
|
||||||
search_url_tags = "+".join(tags)
|
search_url_tags = "+".join(tags)
|
||||||
search_url = ""
|
search_url = ""
|
||||||
|
|
||||||
for _ in range(0, self.retry_limit):
|
for _ in range(0, self.retry_limit):
|
||||||
page_offset = random.randint(0, max_depth)
|
page_offset = random.randint(0, max_depth)
|
||||||
search_url = self.api_endpoint.format(self.url, self.limit, page_offset)
|
search_url = self.api_endpoint.format(self.url, self.limit, page_offset)
|
||||||
search_url_html = self.html_endpoint.format(self.url, self.limit, page_offset)
|
search_url_html = self.html_endpoint.format(self.url, self.limit, page_offset)
|
||||||
if search_url_tags:
|
if search_url_tags:
|
||||||
search_url += self.tag_url.format(search_url_tags)
|
search_url += self.tag_url.format(search_url_tags)
|
||||||
search_url_html += self.tag_url.format(search_url_tags)
|
search_url_html += self.tag_url.format(search_url_tags)
|
||||||
|
|
||||||
search_request = self.search(search_url)
|
search_request = self.search(search_url)
|
||||||
|
|
||||||
if search_request.status_code != 200:
|
if search_request.status_code != 200:
|
||||||
print("Search {} request returned: {}".format(search_url, search_request.status_code))
|
print("Search {} request returned: {}".format(search_url, search_request.status_code))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
posts = search_request.json()
|
posts = search_request.json()
|
||||||
random.shuffle(posts)
|
random.shuffle(posts)
|
||||||
|
|
||||||
selected = select_from_response(posts, profile)
|
selected = select_from_response(posts, profile)
|
||||||
|
|
||||||
if selected is None:
|
if selected is None:
|
||||||
print("Could not select image based on criteria", search_url)
|
print("Could not select image based on criteria", search_url)
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
search_request = self.search(search_url_html)
|
search_request = self.search(search_url_html)
|
||||||
|
|
||||||
if search_request.status_code == 200:
|
if search_request.status_code == 200:
|
||||||
new_max_depth = self.get_max_page(search_request.text)
|
new_max_depth = self.get_max_page(search_request.text)
|
||||||
if new_max_depth < max_depth:
|
if new_max_depth < max_depth:
|
||||||
max_depth = new_max_depth
|
max_depth = new_max_depth
|
||||||
else:
|
else:
|
||||||
max_depth = max_depth // 2
|
max_depth = max_depth // 2
|
||||||
else:
|
else:
|
||||||
max_depth = max_depth // 2
|
max_depth = max_depth // 2
|
||||||
continue
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if not selected:
|
if not selected:
|
||||||
return None
|
print("Error searching:", search_url_tags)
|
||||||
|
return None
|
||||||
|
|
||||||
tag_response = collect_tags(selected)
|
tag_response = collect_tags(selected)
|
||||||
nsfw = get_nsfw(selected)
|
nsfw = get_nsfw(selected)
|
||||||
file_url = selected["file_url"]
|
file_url = selected["file_url"]
|
||||||
|
|
||||||
basename = "{}.{}".format(selected["md5"], file_url.rsplit(".", 1)[1])
|
basename = "{}.{}".format(selected["md5"], file_url.rsplit(".", 1)[1])
|
||||||
full_path = os.path.join(self.tmp, basename)
|
full_path = os.path.join(self.tmp, basename)
|
||||||
|
|
||||||
r = {
|
r = {
|
||||||
# Add profile to dictioanry
|
# Add profile to dictioanry
|
||||||
"profile": profile,
|
"profile": profile,
|
||||||
|
|
||||||
# Query results
|
# Query results
|
||||||
"search_url": search_url,
|
"search_url": search_url,
|
||||||
"file_url": file_url,
|
"file_url": file_url,
|
||||||
"full_path": full_path,
|
"full_path": full_path,
|
||||||
"tag_response": tag_response,
|
"tag_response": tag_response,
|
||||||
"nsfw": nsfw
|
"nsfw": nsfw
|
||||||
}
|
}
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
Reference in New Issue
Block a user