]> git.lizzy.rs Git - nhentai.git/blobdiff - nhentai/downloader.py
Merge pull request #21 from mentaterasmus/master
[nhentai.git] / nhentai / downloader.py
index 09c1a5c99cf1c11490934b4c5f3aa716e062b7b3..27b27696982b9c3340f9b26a7481666e9963ca9b 100644 (file)
-# coding: utf-8
+# coding: utf-
+from __future__ import unicode_literals, print_function
+from future.builtins import str as text
 import os
 import requests
 import threadpool
-from urlparse import urlparse
-from logger import logger
+try:
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse
 
+from nhentai.logger import logger
+from nhentai.parser import request
+from nhentai.utils import Singleton
 
-class Downloader(object):
-    _instance = None
 
-    def __new__(cls, *args, **kwargs):
-        if not cls._instance:
-            cls._instance = super(Downloader, cls).__new__(cls, *args, **kwargs)
-        return cls._instance
+requests.packages.urllib3.disable_warnings()
+
+
+class NhentaiImageNotExistException(Exception):
+    pass
+
+
+class Downloader(Singleton):
 
     def __init__(self, path='', thread=1, timeout=30):
-        if not isinstance(thread, (int, )) or thread < 1 or thread > 10:
+        if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
             raise ValueError('Invalid threads count')
         self.path = str(path)
         self.thread_count = thread
         self.threads = []
         self.timeout = timeout
 
-    def _download(self, url, folder='', filename='', retried=False):
-        logger.info('Start downloading: %s ...' % url)
+    def _download(self, url, folder='', filename='', retried=0):
+        logger.info('Start downloading: {0} ...'.format(url))
         filename = filename if filename else os.path.basename(urlparse(url).path)
+        base_filename, extension = os.path.splitext(filename)
         try:
-            with open(os.path.join(folder, filename), "wb") as f:
-                response = requests.get(url, stream=True, timeout=self.timeout)
+            if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
+                logger.warning('File: {0} existed, ignore.'.format(os.path.join(folder, base_filename.zfill(3) +
+                                                                                extension)))
+                return 1, url
+
+            with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
+                response = request('get', url, stream=True, timeout=self.timeout)
+                if response.status_code != 200:
+                    raise NhentaiImageNotExistException
                 length = response.headers.get('content-length')
                 if length is None:
                     f.write(response.content)
                 else:
                     for chunk in response.iter_content(2048):
                         f.write(chunk)
-        except requests.HTTPError as e:
-            if not retried:
-                logger.error('Error: %s, retrying' % str(e))
-                return self._download(url=url, folder=folder, filename=filename, retried=True)
+
+        except (requests.HTTPError, requests.Timeout) as e:
+            if retried < 3:
+                logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
+                return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
             else:
-                return None
+                return 0, None
+
+        except NhentaiImageNotExistException as e:
+            os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
+            return -1, url
+
         except Exception as e:
-            logger.critical('CRITICAL: %s' % str(e))
-            return None
-        return url
+            logger.critical(str(e))
+            return 0, None
+
+        return 1, url
 
     def _download_callback(self, request, result):
-        if not result:
-            logger.critical('Too many errors occurred, quit.')
-            raise SystemExit
-        logger.log(15, '%s download successfully' % result)
+        result, data = result
+        if result == 0:
+            logger.warning('fatal errors occurred, ignored')
+            # exit(1)
+        elif result == -1:
+            logger.warning('url {} return status code 404'.format(data))
+        else:
+            logger.log(15, '{0} download successfully'.format(data))
 
     def download(self, queue, folder=''):
-        if not isinstance(folder, (str, unicode)):
+        if not isinstance(folder, text):
             folder = str(folder)
 
         if self.path:
             folder = os.path.join(self.path, folder)
 
         if not os.path.exists(folder):
-            logger.warn('Path \'%s\' not exist.' % folder)
+            logger.warn('Path \'{0}\' not exist.'.format(folder))
             try:
                 os.makedirs(folder)
             except EnvironmentError as e:
-                logger.critical('Error: %s' % str(e))
-                raise SystemExit
+                logger.critical('{0}'.format(str(e)))
+                exit(1)
         else:
-            logger.warn('Path \'%s\' already exist.' % folder)
+            logger.warn('Path \'{0}\' already exist.'.format(folder))
 
         queue = [([url], {'folder': folder}) for url in queue]