]> git.lizzy.rs Git - nhentai.git/blob - nhentai/downloader.py
remove print
[nhentai.git] / nhentai / downloader.py
1 # coding: utf-
2 from __future__ import unicode_literals, print_function
3
4 import signal
5
6 from future.builtins import str as text
7 import os
8 import requests
9 import threadpool
10 import time
11 import multiprocessing as mp
12
13 try:
14     from urllib.parse import urlparse
15 except ImportError:
16     from urlparse import urlparse
17
18 from nhentai.logger import logger
19 from nhentai.parser import request
20 from nhentai.utils import Singleton, signal_handler
21
22 requests.packages.urllib3.disable_warnings()
23 semaphore = mp.Semaphore()
24
25
26 class NHentaiImageNotExistException(Exception):
27     pass
28
29
30 class Downloader(Singleton):
31
32     def __init__(self, path='', thread=1, timeout=30, delay=0):
33         if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
34             raise ValueError('Invalid threads count')
35
36         self.path = str(path)
37         self.thread_count = thread
38         self.threads = []
39         self.timeout = timeout
40         self.delay = delay
41
42     def download_(self, url, folder='', filename='', retried=0):
43         if self.delay:
44             time.sleep(self.delay)
45         logger.info('Starting to download {0} ...'.format(url))
46         filename = filename if filename else os.path.basename(urlparse(url).path)
47         base_filename, extension = os.path.splitext(filename)
48         try:
49             if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
50                 logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
51                                                                                 extension)))
52                 return 1, url
53
54             response = None
55             with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
56                 i = 0
57                 while i < 10:
58                     try:
59                         response = request('get', url, stream=True, timeout=self.timeout)
60                         if response.status_code != 200:
61                             raise NHentaiImageNotExistException
62
63                     except NHentaiImageNotExistException as e:
64                         raise e
65
66                     except Exception as e:
67                         i += 1
68                         if not i < 10:
69                             logger.critical(str(e))
70                             return 0, None
71                         continue
72
73                     break
74
75                 length = response.headers.get('content-length')
76                 if length is None:
77                     f.write(response.content)
78                 else:
79                     for chunk in response.iter_content(2048):
80                         f.write(chunk)
81
82         except (requests.HTTPError, requests.Timeout) as e:
83             if retried < 3:
84                 logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
85                 return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
86             else:
87                 return 0, None
88
89         except NHentaiImageNotExistException as e:
90             os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
91             return -1, url
92
93         except Exception as e:
94             import traceback
95             traceback.print_stack()
96             logger.critical(str(e))
97             return 0, None
98
99         except KeyboardInterrupt:
100             return -3, None
101
102         return 1, url
103
104     def _download_callback(self, result):
105         result, data = result
106         if result == 0:
107             logger.warning('fatal errors occurred, ignored')
108             # exit(1)
109         elif result == -1:
110             logger.warning('url {} return status code 404'.format(data))
111         elif result == -2:
112             logger.warning('Ctrl-C pressed, exiting sub processes ...')
113         elif result == -3:
114             # workers wont be run, just pass
115             pass
116         else:
117             logger.log(15, '{0} downloaded successfully'.format(data))
118
119     def download(self, queue, folder=''):
120         if not isinstance(folder, text):
121             folder = str(folder)
122
123         if self.path:
124             folder = os.path.join(self.path, folder)
125
126         if not os.path.exists(folder):
127             logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
128             try:
129                 os.makedirs(folder)
130             except EnvironmentError as e:
131                 logger.critical('{0}'.format(str(e)))
132                 exit(1)
133         else:
134             logger.warn('Path \'{0}\' already exist.'.format(folder))
135
136         queue = [(self, url, folder) for url in queue]
137
138         pool = mp.Pool(self.thread_count, init_worker)
139         for item in queue:
140             pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
141
142         pool.close()
143         pool.join()
144
145
146 def download_wrapper(obj, url, folder=''):
147     if semaphore.get_value():
148         return Downloader.download_(obj, url=url, folder=folder)
149     else:
150         return -3, None
151
152
153 def init_worker():
154     signal.signal(signal.SIGINT, subprocess_signal)
155
156
157 def subprocess_signal(signal, frame):
158     if semaphore.acquire(timeout=1):
159         logger.warning('Ctrl-C pressed, exiting sub processes ...')
160
161     raise KeyboardInterrupt