]> git.lizzy.rs Git - nhentai.git/blob - nhentai/downloader.py
Merge pull request #21 from mentaterasmus/master
[nhentai.git] / nhentai / downloader.py
1 # coding: utf-
2 from __future__ import unicode_literals, print_function
3 from future.builtins import str as text
4 import os
5 import requests
6 import threadpool
7 try:
8     from urllib.parse import urlparse
9 except ImportError:
10     from urlparse import urlparse
11
12 from nhentai.logger import logger
13 from nhentai.parser import request
14 from nhentai.utils import Singleton
15
16
17 requests.packages.urllib3.disable_warnings()
18
19
20 class NhentaiImageNotExistException(Exception):
21     pass
22
23
24 class Downloader(Singleton):
25
26     def __init__(self, path='', thread=1, timeout=30):
27         if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
28             raise ValueError('Invalid threads count')
29         self.path = str(path)
30         self.thread_count = thread
31         self.threads = []
32         self.timeout = timeout
33
34     def _download(self, url, folder='', filename='', retried=0):
35         logger.info('Start downloading: {0} ...'.format(url))
36         filename = filename if filename else os.path.basename(urlparse(url).path)
37         base_filename, extension = os.path.splitext(filename)
38         try:
39             if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
40                 logger.warning('File: {0} existed, ignore.'.format(os.path.join(folder, base_filename.zfill(3) +
41                                                                                 extension)))
42                 return 1, url
43
44             with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
45                 response = request('get', url, stream=True, timeout=self.timeout)
46                 if response.status_code != 200:
47                     raise NhentaiImageNotExistException
48                 length = response.headers.get('content-length')
49                 if length is None:
50                     f.write(response.content)
51                 else:
52                     for chunk in response.iter_content(2048):
53                         f.write(chunk)
54
55         except (requests.HTTPError, requests.Timeout) as e:
56             if retried < 3:
57                 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
58                 return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
59             else:
60                 return 0, None
61
62         except NhentaiImageNotExistException as e:
63             os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
64             return -1, url
65
66         except Exception as e:
67             logger.critical(str(e))
68             return 0, None
69
70         return 1, url
71
72     def _download_callback(self, request, result):
73         result, data = result
74         if result == 0:
75             logger.warning('fatal errors occurred, ignored')
76             # exit(1)
77         elif result == -1:
78             logger.warning('url {} return status code 404'.format(data))
79         else:
80             logger.log(15, '{0} download successfully'.format(data))
81
82     def download(self, queue, folder=''):
83         if not isinstance(folder, text):
84             folder = str(folder)
85
86         if self.path:
87             folder = os.path.join(self.path, folder)
88
89         if not os.path.exists(folder):
90             logger.warn('Path \'{0}\' not exist.'.format(folder))
91             try:
92                 os.makedirs(folder)
93             except EnvironmentError as e:
94                 logger.critical('{0}'.format(str(e)))
95                 exit(1)
96         else:
97             logger.warn('Path \'{0}\' already exist.'.format(folder))
98
99         queue = [([url], {'folder': folder}) for url in queue]
100
101         self.thread_pool = threadpool.ThreadPool(self.thread_count)
102         requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
103         [self.thread_pool.putRequest(req) for req in requests_]
104
105         self.thread_pool.wait()