2 from __future__ import unicode_literals, print_function
\r
8 from bs4 import BeautifulSoup
\r
9 from tabulate import tabulate
\r
11 import nhentai.constant as constant
\r
12 from nhentai.logger import logger
\r
15 def request(method, url, **kwargs):
\r
16 if not hasattr(requests, method):
\r
17 raise AttributeError('\'requests\' object has no attribute \'{0}\''.format(method))
\r
19 return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs)
\r
22 def login_parser(username, password):
\r
23 s = requests.Session()
\r
24 s.proxies = constant.PROXY
\r
26 s.headers.update({'Referer': constant.LOGIN_URL})
\r
28 s.get(constant.LOGIN_URL)
\r
29 content = s.get(constant.LOGIN_URL).content
\r
30 html = BeautifulSoup(content, 'html.parser')
\r
31 csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})
\r
33 if not csrf_token_elem:
\r
34 raise Exception('Cannot find csrf token to login')
\r
35 csrf_token = csrf_token_elem.attrs['value']
\r
38 'csrfmiddlewaretoken': csrf_token,
\r
39 'username_or_email': username,
\r
40 'password': password,
\r
42 resp = s.post(constant.LOGIN_URL, data=login_dict)
\r
43 if 'Invalid username (or email) or password' in resp.text:
\r
44 logger.error('Login failed, please check your username and password')
\r
47 html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser')
\r
48 count = html.find('span', attrs={'class': 'count'})
\r
50 logger.error('Cannot get count of your favorites, maybe login failed.')
\r
52 count = int(count.text.strip('(').strip(')'))
\r
54 logger.warning('No favorites found')
\r
56 pages = int(count / 25)
\r
59 pages += 1 if count % (25 * pages) else 0
\r
63 logger.info('Your have %d favorites in %d pages.' % (count, pages))
\r
65 if os.getenv('DEBUG'):
\r
69 doujinshi_id = re.compile('data-id="([\d]+)"')
\r
71 def _callback(request, result):
\r
74 thread_pool = threadpool.ThreadPool(5)
\r
76 for page in range(1, pages+1):
\r
78 logger.info('Getting doujinshi id of page %d' % page)
\r
79 resp = s.get(constant.FAV_URL + '?page=%d' % page).text
\r
80 ids = doujinshi_id.findall(resp)
\r
81 requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback)
\r
82 [thread_pool.putRequest(req) for req in requests_]
\r
84 except Exception as e:
\r
85 logger.error('Error: %s, continue', str(e))
\r
90 def doujinshi_parser(id_):
\r
91 if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
\r
92 raise Exception('Doujinshi id({0}) is not valid'.format(id_))
\r
95 logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
\r
97 doujinshi['id'] = id_
\r
98 url = '{0}/{1}'.format(constant.DETAIL_URL, id_)
\r
101 response = request('get', url).json()
\r
102 except Exception as e:
\r
103 logger.critical(str(e))
\r
106 doujinshi['name'] = response['title']['english']
\r
107 doujinshi['subtitle'] = response['title']['japanese']
\r
108 doujinshi['img_id'] = response['media_id']
\r
109 doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages']))
\r
110 doujinshi['pages'] = len(response['images']['pages'])
\r
112 # gain information of the doujinshi
\r
113 needed_fields = ['character', 'artist', 'language']
\r
114 for tag in response['tags']:
\r
115 tag_type = tag['type']
\r
116 if tag_type in needed_fields:
\r
117 if tag_type not in doujinshi:
\r
118 doujinshi[tag_type] = tag['name']
\r
120 doujinshi[tag_type] += tag['name']
\r
125 def search_parser(keyword, page):
\r
126 logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
\r
129 response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
\r
130 if 'result' not in response:
\r
131 raise Exception('No result in response')
\r
132 except requests.ConnectionError as e:
\r
134 logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
\r
137 for row in response['result']:
\r
138 title = row['title']['english']
\r
139 title = title[:85] + '..' if len(title) > 85 else title
\r
140 result.append({'id': row['id'], 'title': title})
\r
143 logger.warn('Not found anything of keyword {}'.format(keyword))
\r
148 def print_doujinshi(doujinshi_list):
\r
149 if not doujinshi_list:
\r
151 doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
\r
152 headers = ['id', 'doujinshi']
\r
153 logger.info('Search Result\n' +
\r
154 tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
\r
157 if __name__ == '__main__':
\r
158 print(doujinshi_parser("32271"))
\r