2 from __future__ import unicode_literals, print_function
6 from future.builtins import str as text
11 import multiprocessing as mp
14 from urllib.parse import urlparse
16 from urlparse import urlparse
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton, signal_handler
22 requests.packages.urllib3.disable_warnings()
23 semaphore = mp.Semaphore()
26 class NHentaiImageNotExistException(Exception):
30 class Downloader(Singleton):
32 def __init__(self, path='', thread=1, timeout=30, delay=0):
33 if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
34 raise ValueError('Invalid threads count')
37 self.thread_count = thread
39 self.timeout = timeout
42 def download_(self, url, folder='', filename='', retried=0):
44 time.sleep(self.delay)
45 logger.info('Starting to download {0} ...'.format(url))
46 filename = filename if filename else os.path.basename(urlparse(url).path)
47 base_filename, extension = os.path.splitext(filename)
49 if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
50 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
55 with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
59 response = request('get', url, stream=True, timeout=self.timeout)
60 if response.status_code != 200:
61 raise NHentaiImageNotExistException
63 except NHentaiImageNotExistException as e:
66 except Exception as e:
69 logger.critical(str(e))
75 length = response.headers.get('content-length')
77 f.write(response.content)
79 for chunk in response.iter_content(2048):
82 except (requests.HTTPError, requests.Timeout) as e:
84 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
85 return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
89 except NHentaiImageNotExistException as e:
90 os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
93 except Exception as e:
95 traceback.print_stack()
96 logger.critical(str(e))
99 except KeyboardInterrupt:
104 def _download_callback(self, result):
105 result, data = result
107 logger.warning('fatal errors occurred, ignored')
110 logger.warning('url {} return status code 404'.format(data))
112 logger.warning('Ctrl-C pressed, exiting sub processes ...')
114 # workers wont be run, just pass
117 logger.log(15, '{0} downloaded successfully'.format(data))
119 def download(self, queue, folder=''):
120 if not isinstance(folder, text):
124 folder = os.path.join(self.path, folder)
126 if not os.path.exists(folder):
127 logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
130 except EnvironmentError as e:
131 logger.critical('{0}'.format(str(e)))
134 logger.warn('Path \'{0}\' already exist.'.format(folder))
136 queue = [(self, url, folder) for url in queue]
138 pool = mp.Pool(self.thread_count, init_worker)
140 pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
146 def download_wrapper(obj, url, folder=''):
147 if semaphore.get_value():
148 return Downloader.download_(obj, url=url, folder=folder)
154 signal.signal(signal.SIGINT, subprocess_signal)
157 def subprocess_signal(signal, frame):
158 if semaphore.acquire(timeout=1):
159 logger.warning('Ctrl-C pressed, exiting sub processes ...')
161 raise KeyboardInterrupt