]> git.lizzy.rs Git - nhentai.git/blob - nhentai/downloader.py
Switching 'logger.warn' to 'logger.warning'
[nhentai.git] / nhentai / downloader.py
1 # coding: utf-
2
3 import multiprocessing
4 import signal
5
6 from future.builtins import str as text
7 import sys
8 import os
9 import requests
10 import time
11
12 try:
13     from urllib.parse import urlparse
14 except ImportError:
15     from urlparse import urlparse
16
17 from nhentai.logger import logger
18 from nhentai.parser import request
19 from nhentai.utils import Singleton
20
21 requests.packages.urllib3.disable_warnings()
22 semaphore = multiprocessing.Semaphore(1)
23
24
25 class NHentaiImageNotExistException(Exception):
26     pass
27
28
29 class Downloader(Singleton):
30
31     def __init__(self, path='', size=5, timeout=30, delay=0):
32         self.size = size
33         self.path = str(path)
34         self.timeout = timeout
35         self.delay = delay
36
37     def download_(self, url, folder='', filename='', retried=0):
38         if self.delay:
39             time.sleep(self.delay)
40         logger.info('Starting to download {0} ...'.format(url))
41         filename = filename if filename else os.path.basename(urlparse(url).path)
42         base_filename, extension = os.path.splitext(filename)
43         try:
44             if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
45                 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
46                                                                                 extension)))
47                 return 1, url
48
49             response = None
50             with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
51                 i = 0
52                 while i < 10:
53                     try:
54                         response = request('get', url, stream=True, timeout=self.timeout)
55                         if response.status_code != 200:
56                             raise NHentaiImageNotExistException
57
58                     except NHentaiImageNotExistException as e:
59                         raise e
60
61                     except Exception as e:
62                         i += 1
63                         if not i < 10:
64                             logger.critical(str(e))
65                             return 0, None
66                         continue
67
68                     break
69
70                 length = response.headers.get('content-length')
71                 if length is None:
72                     f.write(response.content)
73                 else:
74                     for chunk in response.iter_content(2048):
75                         f.write(chunk)
76
77         except (requests.HTTPError, requests.Timeout) as e:
78             if retried < 3:
79                 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
80                 return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
81             else:
82                 return 0, None
83
84         except NHentaiImageNotExistException as e:
85             os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
86             return -1, url
87
88         except Exception as e:
89             import traceback
90             traceback.print_stack()
91             logger.critical(str(e))
92             return 0, None
93
94         except KeyboardInterrupt:
95             return -3, None
96
97         return 1, url
98
99     def _download_callback(self, result):
100         result, data = result
101         if result == 0:
102             logger.warning('fatal errors occurred, ignored')
103             # exit(1)
104         elif result == -1:
105             logger.warning('url {} return status code 404'.format(data))
106         elif result == -2:
107             logger.warning('Ctrl-C pressed, exiting sub processes ...')
108         elif result == -3:
109             # workers wont be run, just pass
110             pass
111         else:
112             logger.log(15, '{0} downloaded successfully'.format(data))
113
114     def download(self, queue, folder=''):
115         if not isinstance(folder, text):
116             folder = str(folder)
117
118         if self.path:
119             folder = os.path.join(self.path, folder)
120
121         if not os.path.exists(folder):
122             logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
123             try:
124                 os.makedirs(folder)
125             except EnvironmentError as e:
126                 logger.critical('{0}'.format(str(e)))
127
128         else:
129             logger.warning('Path \'{0}\' already exist.'.format(folder))
130
131         queue = [(self, url, folder) for url in queue]
132
133         pool = multiprocessing.Pool(self.size, init_worker)
134         [pool.apply_async(download_wrapper, args=item) for item in queue]
135
136         pool.close()
137         pool.join()
138
139
140 def download_wrapper(obj, url, folder=''):
141     if sys.platform == 'darwin' or semaphore.get_value():
142         return Downloader.download_(obj, url=url, folder=folder)
143     else:
144         return -3, None
145
146
147 def init_worker():
148     signal.signal(signal.SIGINT, subprocess_signal)
149
150
151 def subprocess_signal(signal, frame):
152     if semaphore.acquire(timeout=1):
153         logger.warning('Ctrl-C pressed, exiting sub processes ...')
154
155     raise KeyboardInterrupt