]> git.lizzy.rs Git - nhentai.git/blob - nhentai/downloader.py
fix bug on mac #126
[nhentai.git] / nhentai / downloader.py
1 # coding: utf-
2 from __future__ import unicode_literals, print_function
3
4 import multiprocessing
5 import signal
6
7 from future.builtins import str as text
8 import sys
9 import os
10 import requests
11 import time
12
13 try:
14     from urllib.parse import urlparse
15 except ImportError:
16     from urlparse import urlparse
17
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton
21
22 requests.packages.urllib3.disable_warnings()
23 semaphore = multiprocessing.Semaphore(1)
24
25
26 class NHentaiImageNotExistException(Exception):
27     pass
28
29
30 class Downloader(Singleton):
31
32     def __init__(self, path='', size=5, timeout=30, delay=0):
33         self.size = size
34         self.path = str(path)
35         self.timeout = timeout
36         self.delay = delay
37
38     def download_(self, url, folder='', filename='', retried=0):
39         if self.delay:
40             time.sleep(self.delay)
41         logger.info('Starting to download {0} ...'.format(url))
42         filename = filename if filename else os.path.basename(urlparse(url).path)
43         base_filename, extension = os.path.splitext(filename)
44         try:
45             if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
46                 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
47                                                                                 extension)))
48                 return 1, url
49
50             response = None
51             with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
52                 i = 0
53                 while i < 10:
54                     try:
55                         response = request('get', url, stream=True, timeout=self.timeout)
56                         if response.status_code != 200:
57                             raise NHentaiImageNotExistException
58
59                     except NHentaiImageNotExistException as e:
60                         raise e
61
62                     except Exception as e:
63                         i += 1
64                         if not i < 10:
65                             logger.critical(str(e))
66                             return 0, None
67                         continue
68
69                     break
70
71                 length = response.headers.get('content-length')
72                 if length is None:
73                     f.write(response.content)
74                 else:
75                     for chunk in response.iter_content(2048):
76                         f.write(chunk)
77
78         except (requests.HTTPError, requests.Timeout) as e:
79             if retried < 3:
80                 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
81                 return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
82             else:
83                 return 0, None
84
85         except NHentaiImageNotExistException as e:
86             os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
87             return -1, url
88
89         except Exception as e:
90             import traceback
91             traceback.print_stack()
92             logger.critical(str(e))
93             return 0, None
94
95         except KeyboardInterrupt:
96             return -3, None
97
98         return 1, url
99
100     def _download_callback(self, result):
101         result, data = result
102         if result == 0:
103             logger.warning('fatal errors occurred, ignored')
104             # exit(1)
105         elif result == -1:
106             logger.warning('url {} return status code 404'.format(data))
107         elif result == -2:
108             logger.warning('Ctrl-C pressed, exiting sub processes ...')
109         elif result == -3:
110             # workers wont be run, just pass
111             pass
112         else:
113             logger.log(15, '{0} downloaded successfully'.format(data))
114
115     def download(self, queue, folder=''):
116         if not isinstance(folder, text):
117             folder = str(folder)
118
119         if self.path:
120             folder = os.path.join(self.path, folder)
121
122         if not os.path.exists(folder):
123             logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
124             try:
125                 os.makedirs(folder)
126             except EnvironmentError as e:
127                 logger.critical('{0}'.format(str(e)))
128
129         else:
130             logger.warn('Path \'{0}\' already exist.'.format(folder))
131
132         queue = [(self, url, folder) for url in queue]
133
134         pool = multiprocessing.Pool(self.size, init_worker)
135         [pool.apply_async(download_wrapper, args=item) for item in queue]
136
137         pool.close()
138         pool.join()
139
140
141 def download_wrapper(obj, url, folder=''):
142     if sys.platform == 'darwin' or semaphore.get_value():
143         return Downloader.download_(obj, url=url, folder=folder)
144     else:
145         return -3, None
146
147
148 def init_worker():
149     signal.signal(signal.SIGINT, subprocess_signal)
150
151
152 def subprocess_signal(signal, frame):
153     if semaphore.acquire(timeout=1):
154         logger.warning('Ctrl-C pressed, exiting sub processes ...')
155
156     raise KeyboardInterrupt