2 from __future__ import unicode_literals, print_function
3 from future.builtins import str as text
8 from urllib.parse import urlparse
10 from urlparse import urlparse
12 from nhentai.logger import logger
13 from nhentai.parser import request
14 from nhentai.utils import Singleton
17 requests.packages.urllib3.disable_warnings()
20 class NhentaiImageNotExistException(Exception):
24 class Downloader(Singleton):
26 def __init__(self, path='', thread=1, timeout=30):
27 if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
28 raise ValueError('Invalid threads count')
30 self.thread_count = thread
32 self.timeout = timeout
34 def _download(self, url, folder='', filename='', retried=0):
35 logger.info('Start downloading: {0} ...'.format(url))
36 filename = filename if filename else os.path.basename(urlparse(url).path)
37 base_filename, extension = os.path.splitext(filename)
39 if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
40 logger.warning('File: {0} existed, ignore.'.format(os.path.join(folder, base_filename.zfill(3) +
44 with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
45 response = request('get', url, stream=True, timeout=self.timeout)
46 if response.status_code != 200:
47 raise NhentaiImageNotExistException
48 length = response.headers.get('content-length')
50 f.write(response.content)
52 for chunk in response.iter_content(2048):
55 except (requests.HTTPError, requests.Timeout) as e:
57 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
58 return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
62 except NhentaiImageNotExistException as e:
63 os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
66 except Exception as e:
67 logger.critical(str(e))
72 def _download_callback(self, request, result):
75 logger.warning('fatal errors occurred, ignored')
78 logger.warning('url {} return status code 404'.format(data))
80 logger.log(15, '{0} download successfully'.format(data))
82 def download(self, queue, folder=''):
83 if not isinstance(folder, text):
87 folder = os.path.join(self.path, folder)
89 if not os.path.exists(folder):
90 logger.warn('Path \'{0}\' not exist.'.format(folder))
93 except EnvironmentError as e:
94 logger.critical('{0}'.format(str(e)))
97 logger.warn('Path \'{0}\' already exist.'.format(folder))
99 queue = [([url], {'folder': folder}) for url in queue]
101 self.thread_pool = threadpool.ThreadPool(self.thread_count)
102 requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
103 [self.thread_pool.putRequest(req) for req in requests_]
105 self.thread_pool.wait()