4 from bs4 import BeautifulSoup
5 from constant import DETAIL_URL
6 from hentai.logger import logger
8 def dojinshi_parser(id):
9 logger.debug('Fetching dojinshi information')
10 if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
11 raise Exception('Dojinshi id(%s) is not valid' % str(id))
15 url = '%s/%d/' % (DETAIL_URL, id)
17 response = requests.get(url).content
18 html = BeautifulSoup(response)
19 dojinshi_info = html.find('div', attrs={'id': 'info'})
21 title = dojinshi_info.find('h1').text
22 subtitle = dojinshi_info.find('h2')
24 dojinshi['name'] = title
25 dojinshi['subtitle'] = subtitle.text if subtitle else ''
27 dojinshi_cover = html.find('div', attrs={'id': 'cover'})
28 img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', dojinshi_cover.a.img['src'])
30 logger.critical('Tried yo get image id failed')
32 dojinshi['img_id'] = img_id.group(1)
33 dojinshi['ext'] = img_id.group(2)
36 for _ in dojinshi_info.find_all('div', class_=''):
37 pages = re.search('([\d]+) pages', _.text)
39 pages = pages.group(1)
41 dojinshi['pages'] = int(pages)
45 if __name__ == '__main__':
46 print dojinshi_parser(32271)