multithreading download

author ricterz <ricterzheng@gmail.com>

Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)

committer ricterz <ricterzheng@gmail.com>

Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)
author ricterz <ricterzheng@gmail.com>
Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)
committer ricterz <ricterzheng@gmail.com>
Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)
diff --git a/hentai/cmdline.py b/hentai/cmdline.py

index 22e5d9d6e8ceb2e79dcd2a6a8724f84c923384ce..14ba288d2c2c93fce8c12d2e8422f0cbfa284e28 100644 (file)
--- a/hentai/cmdline.py
+++ b/hentai/cmdline.py
@@ -1 +1,11 @@
-__author__ = 'ricter'
+from optparse import OptionParser
+
+
+def cmd_parser():
+    parser = OptionParser()
+    parser.add_option('--search', type='string', dest='keyword', action='store')
+    parser.add_option('--download', dest='is_download', action='store_true')
+    parser.add_option('--id', type='int', dest='id', action='store')
+
+    args, _ = parser.parse_args()
+    return args
diff --git a/hentai/constant.py b/hentai/constant.py

index 22e5d9d6e8ceb2e79dcd2a6a8724f84c923384ce..8113a374c98785cd257db5921c40238aa199ee9e 100644 (file)
--- a/hentai/constant.py
+++ b/hentai/constant.py
@@ -1 +1,4 @@
-__author__ = 'ricter'
+SCHEMA = 'http://'
+URL = '%snhentai.net' % SCHEMA
+DETAIL_URL = '%s/g' % URL
+IMAGE_URL = '%si.nhentai.net/galleries' % SCHEMA
+\ No newline at end of file
diff --git a/hentai/dojinshi.py b/hentai/dojinshi.py

index 22e5d9d6e8ceb2e79dcd2a6a8724f84c923384ce..0cc214cc320acecce8be18440ff9ffc6ddee9e15 100644 (file)
--- a/hentai/dojinshi.py
+++ b/hentai/dojinshi.py
@@ -1 +1,40 @@
-__author__ = 'ricter'
+import Queue
+from constant import DETAIL_URL, IMAGE_URL
+
+
+class Dojinshi(object):
+    def __init__(self, name=None, subtitle=None, id=None, pages=0):
+        self.name = name
+        self.subtitle = subtitle
+        self.id = id
+        self.pages = pages
+        self.downloader = None
+        self.url = '%s/%d' % (DETAIL_URL, self.id)
+
+    def __repr__(self):
+        return '<Dojinshi: %s>' % self.name
+
+    def show(self):
+        print 'Dojinshi: %s' % self.name
+        print 'Subtitle: %s' % self.subtitle
+        print 'URL: %s' % self.url
+        print 'Pages: %d' % self.pages
+
+    def download(self):
+        if self.downloader:
+            download_queue = Queue.Queue()
+            for i in xrange(1, self.pages + 1):
+                download_queue.put('%s/%d/%d.jpg' % (IMAGE_URL, self.id, i))
+            self.downloader.download(download_queue)
+        else:
+            raise Exception('Downloader has not be loaded')
+
+
+if __name__ == '__main__':
+    test = Dojinshi(name='test hentai dojinshi', id=1)
+    print test
+    test.show()
+    try:
+        test.download()
+    except Exception, e:
+        print 'Exception: %s' % str(e)
+\ No newline at end of file
diff --git a/hentai/downloader.py b/hentai/downloader.py

index 22e5d9d6e8ceb2e79dcd2a6a8724f84c923384ce..35767e81912a6e6cc978627fa6cc1ea01c9d6128 100644 (file)
--- a/hentai/downloader.py
+++ b/hentai/downloader.py
@@ -1 +1,37 @@
-__author__ = 'ricter'
+import threading
+import Queue
+
+
+class Downloader(object):
+    def __init__(self):
+        self.threads = []
+
+    def _download(self, queue):
+        while True:
+            if not queue.qsize():
+                queue.task_done()
+                break
+            try:
+                url = queue.get(False)
+                print 'Downloading: %s' % url
+            except Queue.Empty:
+                break
+
+    def download(self, queue):
+        for i in range(10):
+            _ = threading.Thread(target=self._download, args=(queue, ))
+            self.threads.append(_)
+
+        for i in self.threads:
+            i.start()
+
+        for i in self.threads:
+            i.join()
+
+
+if __name__ == '__main__':
+    d = Downloader()
+    q = Queue.Queue()
+    for i in range(0, 50):
+        q.put(i)
+    d.download(q)
diff --git a/hentai/parser.py b/hentai/parser.py

index 22e5d9d6e8ceb2e79dcd2a6a8724f84c923384ce..ee029a613f0d025db96ac32c9131a766c2578291 100644 (file)
--- a/hentai/parser.py
+++ b/hentai/parser.py
@@ -1 +1,39 @@
-__author__ = 'ricter'
+import re
+import requests
+from bs4 import BeautifulSoup
+from constant import DETAIL_URL
+
+
+dojinshi_fields = ['Artists:']
+
+
+def dojinshi_parser(id):
+    if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
+        raise Exception('Dojinshi id(%s) is not valid' % str(id))
+    id = int(id)
+    dojinshi = dict()
+    dojinshi['id'] = id
+    url = '%s/%d/' % (DETAIL_URL, id)
+
+    response = requests.get(url).content
+    html = BeautifulSoup(response)
+    dojinshi_info = html.find('div', attrs={'id': 'info'})
+
+    title = dojinshi_info.find('h1').text
+    subtitle = dojinshi_info.find('h2')
+
+    dojinshi['name'] = title
+    dojinshi['subtitle'] = subtitle.text if subtitle else ''
+
+    pages = 0
+    for _ in dojinshi_info.find_all('div', class_=''):
+        pages = re.search('([\d]+) pages', _.text)
+        if pages:
+            pages = pages.group(1)
+            break
+    dojinshi['pages'] = int(pages)
+    return dojinshi
+
+
+if __name__ == '__main__':
+    print dojinshi_parser(32271)
+\ No newline at end of file
diff --git a/nhentai.py b/nhentai.py

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9f65464f31be2e4c4e6f48d79c24535df8ccdc73 100644 (file)
--- a/nhentai.py
+++ b/nhentai.py
@@ -0,0 +1,26 @@
+from hentai.cmdline import cmd_parser
+from hentai.parser import dojinshi_parser
+from hentai.dojinshi import Dojinshi
+from hentai.downloader import Downloader
+
+
+def main():
+    options = cmd_parser()
+    dojinshi = None
+
+    if options.id:
+        dojinshi_info = dojinshi_parser(options.id)
+        dojinshi = Dojinshi(**dojinshi_info)
+    elif options.keyword:
+        pass
+    else:
+        raise SystemExit
+
+    dojinshi.show()
+    if options.is_download:
+        dojinshi.downloader = Downloader()
+        dojinshi.download()
+
+
+if __name__ == '__main__':
+    main()
+\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..713b1e6ac100cce33bdbe00445f65f318d36cc51 100644 (file)
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests>=2.5.0
+wget>=2.2
+BeautifulSoup4>=4.0.0
+\ No newline at end of file
author	ricterz <ricterzheng@gmail.com>
	Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)
committer	ricterz <ricterzheng@gmail.com>
	Sun, 19 Apr 2015 05:13:55 +0000 (13:13 +0800)
hentai/cmdline.py		patch \| blob \| history
hentai/constant.py		patch \| blob \| history
hentai/dojinshi.py		patch \| blob \| history
hentai/downloader.py		patch \| blob \| history
hentai/parser.py		patch \| blob \| history
nhentai.py		patch \| blob \| history
requirements.txt		patch \| blob \| history