6 from future.builtins import str as text
13 from urllib.parse import urlparse
15 from urlparse import urlparse
17 from nhentai import constant
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton
22 requests.packages.urllib3.disable_warnings()
23 semaphore = multiprocessing.Semaphore(1)
26 class NHentaiImageNotExistException(Exception):
30 class Downloader(Singleton):
32 def __init__(self, path='', size=5, timeout=30, delay=0):
35 self.timeout = timeout
38 def download_(self, url, folder='', filename='', retried=0, proxy=None):
40 time.sleep(self.delay)
41 logger.info('Starting to download {0} ...'.format(url))
42 filename = filename if filename else os.path.basename(urlparse(url).path)
43 base_filename, extension = os.path.splitext(filename)
45 if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
46 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
51 with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
55 response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
56 if response.status_code != 200:
57 raise NHentaiImageNotExistException
59 except NHentaiImageNotExistException as e:
62 except Exception as e:
65 logger.critical(str(e))
71 length = response.headers.get('content-length')
73 f.write(response.content)
75 for chunk in response.iter_content(2048):
78 except (requests.HTTPError, requests.Timeout) as e:
80 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
81 return 0, self.download_(url=url, folder=folder, filename=filename,
82 retried=retried+1, proxy=proxy)
86 except NHentaiImageNotExistException as e:
87 os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
90 except Exception as e:
92 traceback.print_stack()
93 logger.critical(str(e))
96 except KeyboardInterrupt:
101 def _download_callback(self, result):
102 result, data = result
104 logger.warning('fatal errors occurred, ignored')
107 logger.warning('url {} return status code 404'.format(data))
109 logger.warning('Ctrl-C pressed, exiting sub processes ...')
111 # workers wont be run, just pass
114 logger.log(15, '{0} downloaded successfully'.format(data))
116 def download(self, queue, folder=''):
117 if not isinstance(folder, text):
121 folder = os.path.join(self.path, folder)
123 if not os.path.exists(folder):
124 logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
127 except EnvironmentError as e:
128 logger.critical('{0}'.format(str(e)))
131 logger.warning('Path \'{0}\' already exist.'.format(folder))
133 queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
135 pool = multiprocessing.Pool(self.size, init_worker)
136 [pool.apply_async(download_wrapper, args=item) for item in queue]
142 def download_wrapper(obj, url, folder='', proxy=None):
143 if sys.platform == 'darwin' or semaphore.get_value():
144 return Downloader.download_(obj, url=url, folder=folder, proxy=proxy)
150 signal.signal(signal.SIGINT, subprocess_signal)
153 def subprocess_signal(signal, frame):
154 if semaphore.acquire(timeout=1):
155 logger.warning('Ctrl-C pressed, exiting sub processes ...')
157 raise KeyboardInterrupt