5 from urlparse import urlparse
6 from logger import logger
9 class Downloader(object):
12 def __new__(cls, *args, **kwargs):
14 cls._instance = super(Downloader, cls).__new__(cls, *args, **kwargs)
17 def __init__(self, path='', thread=1, timeout=30):
18 if not isinstance(thread, (int, )) or thread < 1 or thread > 10:
19 raise ValueError('Invalid threads count')
21 self.thread_count = thread
23 self.timeout = timeout
25 def _download(self, url, folder='', filename='', retried=False):
26 logger.info('Start downloading: %s ...' % url)
27 filename = filename if filename else os.path.basename(urlparse(url).path)
29 with open(os.path.join(folder, filename), "wb") as f:
30 response = requests.get(url, stream=True, timeout=self.timeout)
31 length = response.headers.get('content-length')
33 f.write(response.content)
35 for chunk in response.iter_content(2048):
37 except requests.HTTPError as e:
39 logger.error('Error: %s, retrying' % str(e))
40 return self._download(url=url, folder=folder, filename=filename, retried=True)
43 except Exception as e:
44 logger.critical('CRITICAL: %s' % str(e))
48 def _download_callback(self, request, result):
50 logger.critical('Too many errors occurred, quit.')
52 logger.log(15, '%s download successfully' % result)
54 def download(self, queue, folder=''):
55 if not isinstance(folder, (str, unicode)):
59 folder = os.path.join(self.path, folder)
61 if not os.path.exists(folder):
62 logger.warn('Path \'%s\' not exist.' % folder)
65 except EnvironmentError as e:
66 logger.critical('Error: %s' % str(e))
69 logger.warn('Path \'%s\' already exist.' % folder)
71 queue = [([url], {'folder': folder}) for url in queue]
73 self.thread_pool = threadpool.ThreadPool(self.thread_count)
74 requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
75 [self.thread_pool.putRequest(req) for req in requests_]
77 self.thread_pool.wait()