-# coding: utf-8
+# coding: utf-
+from __future__ import unicode_literals, print_function
+from future.builtins import str as text
import os
import requests
import threadpool
-from urlparse import urlparse
-from logger import logger
+try:
+ from urllib.parse import urlparse
+except ImportError:
+ from urlparse import urlparse
+from nhentai.logger import logger
+from nhentai.parser import request
+from nhentai.utils import Singleton
-class Downloader(object):
- _instance = None
- def __new__(cls, *args, **kwargs):
- if not cls._instance:
- cls._instance = super(Downloader, cls).__new__(cls, *args, **kwargs)
- return cls._instance
+requests.packages.urllib3.disable_warnings()
+
+
+class NhentaiImageNotExistException(Exception):
+ pass
+
+
+class Downloader(Singleton):
def __init__(self, path='', thread=1, timeout=30):
- if not isinstance(thread, (int, )) or thread < 1 or thread > 10:
+ if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
raise ValueError('Invalid threads count')
self.path = str(path)
self.thread_count = thread
self.threads = []
self.timeout = timeout
- def _download(self, url, folder='', filename='', retried=False):
- logger.info('Start downloading: %s ...' % url)
+ def _download(self, url, folder='', filename='', retried=0):
+ logger.info('Start downloading: {0} ...'.format(url))
filename = filename if filename else os.path.basename(urlparse(url).path)
+ base_filename, extension = os.path.splitext(filename)
try:
- with open(os.path.join(folder, filename), "wb") as f:
- response = requests.get(url, stream=True, timeout=self.timeout)
+ if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
+ logger.warning('File: {0} existed, ignore.'.format(os.path.join(folder, base_filename.zfill(3) +
+ extension)))
+ return 1, url
+
+ with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
+ response = request('get', url, stream=True, timeout=self.timeout)
+ if response.status_code != 200:
+ raise NhentaiImageNotExistException
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
for chunk in response.iter_content(2048):
f.write(chunk)
- except requests.HTTPError as e:
- if not retried:
- logger.error('Error: %s, retrying' % str(e))
- return self._download(url=url, folder=folder, filename=filename, retried=True)
+
+ except (requests.HTTPError, requests.Timeout) as e:
+ if retried < 3:
+ logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
+ return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
else:
- return None
+ return 0, None
+
+ except NhentaiImageNotExistException as e:
+ os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
+ return -1, url
+
except Exception as e:
- logger.critical('CRITICAL: %s' % str(e))
- return None
- return url
+ logger.critical(str(e))
+ return 0, None
+
+ return 1, url
def _download_callback(self, request, result):
- if not result:
- logger.critical('Too many errors occurred, quit.')
- raise SystemExit
- logger.log(15, '%s download successfully' % result)
+ result, data = result
+ if result == 0:
+ logger.warning('fatal errors occurred, ignored')
+ # exit(1)
+ elif result == -1:
+ logger.warning('url {} return status code 404'.format(data))
+ else:
+ logger.log(15, '{0} download successfully'.format(data))
def download(self, queue, folder=''):
- if not isinstance(folder, (str, unicode)):
+ if not isinstance(folder, text):
folder = str(folder)
if self.path:
folder = os.path.join(self.path, folder)
if not os.path.exists(folder):
- logger.warn('Path \'%s\' not exist.' % folder)
+ logger.warn('Path \'{0}\' not exist.'.format(folder))
try:
os.makedirs(folder)
except EnvironmentError as e:
- logger.critical('Error: %s' % str(e))
- raise SystemExit
+ logger.critical('{0}'.format(str(e)))
+ exit(1)
else:
- logger.warn('Path \'%s\' already exist.' % folder)
+ logger.warn('Path \'{0}\' already exist.'.format(folder))
queue = [([url], {'folder': folder}) for url in queue]