2 from __future__ import unicode_literals, print_function
7 from future.builtins import str as text
14 from urllib.parse import urlparse
16 from urlparse import urlparse
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton
22 requests.packages.urllib3.disable_warnings()
23 semaphore = multiprocessing.Semaphore(1)
26 class NHentaiImageNotExistException(Exception):
30 class Downloader(Singleton):
32 def __init__(self, path='', size=5, timeout=30, delay=0):
35 self.timeout = timeout
38 def download_(self, url, folder='', filename='', retried=0):
40 time.sleep(self.delay)
41 logger.info('Starting to download {0} ...'.format(url))
42 filename = filename if filename else os.path.basename(urlparse(url).path)
43 base_filename, extension = os.path.splitext(filename)
45 if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
46 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
51 with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
55 response = request('get', url, stream=True, timeout=self.timeout)
56 if response.status_code != 200:
57 raise NHentaiImageNotExistException
59 except NHentaiImageNotExistException as e:
62 except Exception as e:
65 logger.critical(str(e))
71 length = response.headers.get('content-length')
73 f.write(response.content)
75 for chunk in response.iter_content(2048):
78 except (requests.HTTPError, requests.Timeout) as e:
80 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
81 return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
85 except NHentaiImageNotExistException as e:
86 os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
89 except Exception as e:
91 traceback.print_stack()
92 logger.critical(str(e))
95 except KeyboardInterrupt:
100 def _download_callback(self, result):
101 result, data = result
103 logger.warning('fatal errors occurred, ignored')
106 logger.warning('url {} return status code 404'.format(data))
108 logger.warning('Ctrl-C pressed, exiting sub processes ...')
110 # workers wont be run, just pass
113 logger.log(15, '{0} downloaded successfully'.format(data))
115 def download(self, queue, folder=''):
116 if not isinstance(folder, text):
120 folder = os.path.join(self.path, folder)
122 if not os.path.exists(folder):
123 logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
126 except EnvironmentError as e:
127 logger.critical('{0}'.format(str(e)))
130 logger.warn('Path \'{0}\' already exist.'.format(folder))
132 queue = [(self, url, folder) for url in queue]
134 pool = multiprocessing.Pool(self.size, init_worker)
135 [pool.apply_async(download_wrapper, args=item) for item in queue]
141 def download_wrapper(obj, url, folder=''):
142 if sys.platform == 'darwin' or semaphore.get_value():
143 return Downloader.download_(obj, url=url, folder=folder)
149 signal.signal(signal.SIGINT, subprocess_signal)
152 def subprocess_signal(signal, frame):
153 if semaphore.acquire(timeout=1):
154 logger.warning('Ctrl-C pressed, exiting sub processes ...')
156 raise KeyboardInterrupt