5 from urlparse import urlparse
6 from logger import logger
7 from parser import request
10 class Downloader(object):
13 def __new__(cls, *args, **kwargs):
15 cls._instance = super(Downloader, cls).__new__(cls, *args, **kwargs)
18 def __init__(self, path='', thread=1, timeout=30):
19 if not isinstance(thread, (int, )) or thread < 1 or thread > 10:
20 raise ValueError('Invalid threads count')
22 self.thread_count = thread
24 self.timeout = timeout
26 def _download(self, url, folder='', filename='', retried=False):
27 logger.info('Start downloading: {} ...'.format(url))
28 filename = filename if filename else os.path.basename(urlparse(url).path)
30 with open(os.path.join(folder, filename), "wb") as f:
31 response = request('get', url, stream=True, timeout=self.timeout)
32 length = response.headers.get('content-length')
34 f.write(response.content)
36 for chunk in response.iter_content(2048):
38 except requests.HTTPError as e:
40 logger.error('Error: {}, retrying'.format(str(e)))
41 return self._download(url=url, folder=folder, filename=filename, retried=True)
44 except Exception as e:
45 logger.critical(str(e))
49 def _download_callback(self, request, result):
51 logger.critical('Too many errors occurred, quit.')
53 logger.log(15, '{} download successfully'.format(result))
55 def download(self, queue, folder=''):
56 if not isinstance(folder, (str, unicode)):
60 folder = os.path.join(self.path, folder)
62 if not os.path.exists(folder):
63 logger.warn('Path \'{}\' not exist.'.format(folder))
66 except EnvironmentError as e:
67 logger.critical('Error: {}'.format(str(e)))
70 logger.warn('Path \'{}\' already exist.'.format(folder))
72 queue = [([url], {'folder': folder}) for url in queue]
74 self.thread_pool = threadpool.ThreadPool(self.thread_count)
75 requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
76 [self.thread_pool.putRequest(req) for req in requests_]
78 self.thread_pool.wait()