6 from future.builtins import str as text
13 from urllib.parse import urlparse
15 from urlparse import urlparse
17 from nhentai import constant
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton
22 requests.packages.urllib3.disable_warnings()
23 semaphore = multiprocessing.Semaphore(1)
26 class NHentaiImageNotExistException(Exception):
30 class Downloader(Singleton):
32 def __init__(self, path='', size=5, timeout=30, delay=0):
35 self.timeout = timeout
38 def download_(self, url, folder='', filename='', retried=0, proxy=None):
40 time.sleep(self.delay)
41 logger.info('Starting to download {0} ...'.format(url))
42 filename = filename if filename else os.path.basename(urlparse(url).path)
43 base_filename, extension = os.path.splitext(filename)
45 if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
46 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
51 with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
55 response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
56 if response.status_code != 200:
57 raise NHentaiImageNotExistException
59 except NHentaiImageNotExistException as e:
62 except Exception as e:
65 logger.critical(str(e))
71 length = response.headers.get('content-length')
73 f.write(response.content)
75 for chunk in response.iter_content(2048):
78 except (requests.HTTPError, requests.Timeout) as e:
80 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
81 return 0, self.download_(url=url, folder=folder, filename=filename,
82 retried=retried+1, proxy=proxy)
86 except NHentaiImageNotExistException as e:
87 os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
90 except Exception as e:
92 traceback.print_stack()
93 logger.critical(str(e))
96 except KeyboardInterrupt:
101 def _download_callback(self, result):
102 result, data = result
104 logger.warning('fatal errors occurred, ignored')
107 logger.warning('url {} return status code 404'.format(data))
109 logger.warning('Ctrl-C pressed, exiting sub processes ...')
111 # workers wont be run, just pass
114 logger.log(15, '{0} downloaded successfully'.format(data))
116 def download(self, queue, folder='', regenerate_cbz=False):
117 if not isinstance(folder, text):
121 folder = os.path.join(self.path, folder)
123 if os.path.exists(folder + '.cbz'):
124 if not regenerate_cbz:
125 logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder))
128 if not os.path.exists(folder):
129 logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
132 except EnvironmentError as e:
133 logger.critical('{0}'.format(str(e)))
136 logger.warning('Path \'{0}\' already exist.'.format(folder))
138 queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
140 pool = multiprocessing.Pool(self.size, init_worker)
141 [pool.apply_async(download_wrapper, args=item) for item in queue]
147 def download_wrapper(obj, url, folder='', proxy=None):
148 if sys.platform == 'darwin' or semaphore.get_value():
149 return Downloader.download_(obj, url=url, folder=folder, proxy=proxy)
155 signal.signal(signal.SIGINT, subprocess_signal)
158 def subprocess_signal(signal, frame):
159 if semaphore.acquire(timeout=1):
160 logger.warning('Ctrl-C pressed, exiting sub processes ...')
162 raise KeyboardInterrupt