]> git.lizzy.rs Git - nhentai.git/blob - nhentai/downloader.py
fix for issue #236
[nhentai.git] / nhentai / downloader.py
1 # coding: utf-
2
3 import multiprocessing
4 import signal
5
6 from future.builtins import str as text
7 import sys
8 import os
9 import requests
10 import time
11
12 try:
13     from urllib.parse import urlparse
14 except ImportError:
15     from urlparse import urlparse
16
17 from nhentai import constant
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton
21
22 requests.packages.urllib3.disable_warnings()
23 semaphore = multiprocessing.Semaphore(1)
24
25
26 class NHentaiImageNotExistException(Exception):
27     pass
28
29
30 class Downloader(Singleton):
31
32     def __init__(self, path='', size=5, timeout=30, delay=0):
33         self.size = size
34         self.path = str(path)
35         self.timeout = timeout
36         self.delay = delay
37
38     def download_(self, url, folder='', filename='', retried=0, proxy=None):
39         if self.delay:
40             time.sleep(self.delay)
41         logger.info('Starting to download {0} ...'.format(url))
42         filename = filename if filename else os.path.basename(urlparse(url).path)
43         base_filename, extension = os.path.splitext(filename)
44         try:
45             if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
46                 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
47                                                                                 extension)))
48                 return 1, url
49
50             response = None
51             with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
52                 i = 0
53                 while i < 10:
54                     try:
55                         response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
56                         if response.status_code != 200:
57                             raise NHentaiImageNotExistException
58
59                     except NHentaiImageNotExistException as e:
60                         raise e
61
62                     except Exception as e:
63                         i += 1
64                         if not i < 10:
65                             logger.critical(str(e))
66                             return 0, None
67                         continue
68
69                     break
70
71                 length = response.headers.get('content-length')
72                 if length is None:
73                     f.write(response.content)
74                 else:
75                     for chunk in response.iter_content(2048):
76                         f.write(chunk)
77
78         except (requests.HTTPError, requests.Timeout) as e:
79             if retried < 3:
80                 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
81                 return 0, self.download_(url=url, folder=folder, filename=filename,
82                                          retried=retried+1, proxy=proxy)
83             else:
84                 return 0, None
85
86         except NHentaiImageNotExistException as e:
87             os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
88             return -1, url
89
90         except Exception as e:
91             import traceback
92             traceback.print_stack()
93             logger.critical(str(e))
94             return 0, None
95
96         except KeyboardInterrupt:
97             return -3, None
98
99         return 1, url
100
101     def _download_callback(self, result):
102         result, data = result
103         if result == 0:
104             logger.warning('fatal errors occurred, ignored')
105             # exit(1)
106         elif result == -1:
107             logger.warning('url {} return status code 404'.format(data))
108         elif result == -2:
109             logger.warning('Ctrl-C pressed, exiting sub processes ...')
110         elif result == -3:
111             # workers wont be run, just pass
112             pass
113         else:
114             logger.log(15, '{0} downloaded successfully'.format(data))
115
116     def download(self, queue, folder='', regenerate_cbz=False):
117         if not isinstance(folder, text):
118             folder = str(folder)
119
120         if self.path:
121             folder = os.path.join(self.path, folder)
122
123         if os.path.exists(folder + '.cbz'):
124             if not regenerate_cbz:
125                 logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder))
126                 return
127
128         if not os.path.exists(folder):
129             logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
130             try:
131                 os.makedirs(folder)
132             except EnvironmentError as e:
133                 logger.critical('{0}'.format(str(e)))
134
135         else:
136             logger.warning('Path \'{0}\' already exist.'.format(folder))
137
138         queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
139
140         pool = multiprocessing.Pool(self.size, init_worker)
141         [pool.apply_async(download_wrapper, args=item) for item in queue]
142
143         pool.close()
144         pool.join()
145
146
147 def download_wrapper(obj, url, folder='', proxy=None):
148     if sys.platform == 'darwin' or semaphore.get_value():
149         return Downloader.download_(obj, url=url, folder=folder, proxy=proxy)
150     else:
151         return -3, None
152
153
154 def init_worker():
155     signal.signal(signal.SIGINT, subprocess_signal)
156
157
158 def subprocess_signal(signal, frame):
159     if semaphore.acquire(timeout=1):
160         logger.warning('Ctrl-C pressed, exiting sub processes ...')
161
162     raise KeyboardInterrupt