nhentai/utils.py

   1 # coding: utf-8
   2
   3 import sys
   4 import re
   5 import os
   6 import zipfile
   7 import shutil
   8 import requests
   9 import sqlite3
  10
  11 from nhentai import constant
  12 from nhentai.logger import logger
  13 from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
  14
  15
  16 def request(method, url, **kwargs):
  17     session = requests.Session()
  18     session.headers.update({
  19         'Referer': constant.LOGIN_URL,
  20         'User-Agent': constant.CONFIG['useragent'],
  21         'Cookie': constant.CONFIG['cookie']
  22     })
  23
  24     if not kwargs.get('proxies', None):
  25         kwargs['proxies'] = constant.CONFIG['proxy']
  26
  27     return getattr(session, method)(url, verify=False, **kwargs)
  28
  29
  30 def check_cookie():
  31     response = request('get', constant.BASE_URL)
  32     if response.status_code == 503 and 'cf-browser-verification' in response.text:
  33         logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
  34         exit(-1)
  35
  36     username = re.findall('"/users/\d+/(.*?)"', response.text)
  37     if not username:
  38         logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
  39     else:
  40         logger.info('Login successfully! Your username: {}'.format(username[0]))
  41
  42
  43 class _Singleton(type):
  44     """ A metaclass that creates a Singleton base class when called. """
  45     _instances = {}
  46
  47     def __call__(cls, *args, **kwargs):
  48         if cls not in cls._instances:
  49             cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
  50         return cls._instances[cls]
  51
  52
  53 class Singleton(_Singleton(str('SingletonMeta'), (object,), {})):
  54     pass
  55
  56
  57 def urlparse(url):
  58     try:
  59         from urlparse import urlparse
  60     except ImportError:
  61         from urllib.parse import urlparse
  62
  63     return urlparse(url)
  64
  65
  66 def readfile(path):
  67     loc = os.path.dirname(__file__)
  68
  69     with open(os.path.join(loc, path), 'r') as file:
  70         return file.read()
  71
  72
  73 def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
  74     image_html = ''
  75
  76     if doujinshi_obj is not None:
  77         doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
  78     else:
  79         doujinshi_dir = '.'
  80
  81     if not os.path.exists(doujinshi_dir):
  82         logger.warning('Path \'{0}\' does not exist, creating.'.format(doujinshi_dir))
  83         try:
  84             os.makedirs(doujinshi_dir)
  85         except EnvironmentError as e:
  86             logger.critical('{0}'.format(str(e)))
  87
  88     file_list = os.listdir(doujinshi_dir)
  89     file_list.sort()
  90
  91     for image in file_list:
  92         if not os.path.splitext(image)[1] in ('.jpg', '.png'):
  93             continue
  94
  95         image_html += '<img src="{0}" class="image-item"/>\n' \
  96             .format(image)
  97     html = readfile('viewer/{}/index.html'.format(template))
  98     css = readfile('viewer/{}/styles.css'.format(template))
  99     js = readfile('viewer/{}/scripts.js'.format(template))
 100
 101     if doujinshi_obj is not None:
 102         serialize_json(doujinshi_obj, doujinshi_dir)
 103         name = doujinshi_obj.name
 104         if sys.version_info < (3, 0):
 105             name = doujinshi_obj.name.encode('utf-8')
 106     else:
 107         name = {'title': 'nHentai HTML Viewer'}
 108
 109     data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
 110     try:
 111         if sys.version_info < (3, 0):
 112             with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f:
 113                 f.write(data)
 114         else:
 115             with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
 116                 f.write(data.encode('utf-8'))
 117
 118         logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
 119     except Exception as e:
 120         logger.warning('Writing HTML Viewer failed ({})'.format(str(e)))
 121
 122
 123 def generate_main_html(output_dir='./'):
 124     """
 125     Generate a main html to show all the contain doujinshi.
 126     With a link to their `index.html`.
 127     Default output folder will be the CLI path.
 128     """
 129
 130     image_html = ''
 131
 132     main = readfile('viewer/main.html')
 133     css = readfile('viewer/main.css')
 134     js = readfile('viewer/main.js')
 135
 136     element = '\n\
 137             <div class="gallery-favorite">\n\
 138                 <div class="gallery">\n\
 139                     <a href="./{FOLDER}/index.html" class="cover" style="padding:0 0 141.6% 0"><img\n\
 140                             src="./{FOLDER}/{IMAGE}" />\n\
 141                         <div class="caption">{TITLE}</div>\n\
 142                     </a>\n\
 143                 </div>\n\
 144             </div>\n'
 145
 146     os.chdir(output_dir)
 147     doujinshi_dirs = next(os.walk('.'))[1]
 148
 149     for folder in doujinshi_dirs:
 150         files = os.listdir(folder)
 151         files.sort()
 152
 153         if 'index.html' in files:
 154             logger.info('Add doujinshi \'{}\''.format(folder))
 155         else:
 156             continue
 157
 158         image = files[0]  # 001.jpg or 001.png
 159         if folder is not None:
 160             title = folder.replace('_', ' ')
 161         else:
 162             title = 'nHentai HTML Viewer'
 163
 164         image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
 165     if image_html == '':
 166         logger.warning('No index.html found, --gen-main paused.')
 167         return
 168     try:
 169         data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html)
 170         if sys.version_info < (3, 0):
 171             with open('./main.html', 'w') as f:
 172                 f.write(data)
 173         else:
 174             with open('./main.html', 'wb') as f:
 175                 f.write(data.encode('utf-8'))
 176         shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
 177         set_js_database()
 178         logger.log(
 179             15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir))
 180     except Exception as e:
 181         logger.warning('Writing Main Viewer failed ({})'.format(str(e)))
 182
 183
 184 def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True):
 185     if doujinshi_obj is not None:
 186         doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
 187         if write_comic_info:
 188             serialize_comic_xml(doujinshi_obj, doujinshi_dir)
 189         cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename))
 190     else:
 191         cbz_filename = './doujinshi.cbz'
 192         doujinshi_dir = '.'
 193
 194     file_list = os.listdir(doujinshi_dir)
 195     file_list.sort()
 196
 197     logger.info('Writing CBZ file to path: {}'.format(cbz_filename))
 198     with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
 199         for image in file_list:
 200             image_path = os.path.join(doujinshi_dir, image)
 201             cbz_pf.write(image_path, image)
 202
 203     if rm_origin_dir:
 204         shutil.rmtree(doujinshi_dir, ignore_errors=True)
 205
 206     logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir))
 207
 208
 209 def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
 210     try:
 211         import img2pdf
 212
 213         """Write images to a PDF file using img2pdf."""
 214         if doujinshi_obj is not None:
 215             doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
 216             pdf_filename = os.path.join(
 217                 os.path.join(doujinshi_dir, '..'),
 218                 '{}.pdf'.format(doujinshi_obj.filename)
 219             )
 220         else:
 221             pdf_filename = './doujinshi.pdf'
 222             doujinshi_dir = '.'
 223
 224         file_list = os.listdir(doujinshi_dir)
 225         file_list.sort()
 226
 227         logger.info('Writing PDF file to path: {}'.format(pdf_filename))
 228         with open(pdf_filename, 'wb') as pdf_f:
 229             full_path_list = (
 230                 [os.path.join(doujinshi_dir, image) for image in file_list]
 231             )
 232             pdf_f.write(img2pdf.convert(full_path_list))
 233
 234         if rm_origin_dir:
 235             shutil.rmtree(doujinshi_dir, ignore_errors=True)
 236
 237         logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
 238
 239     except ImportError:
 240         logger.error("Please install img2pdf package by using pip.")
 241
 242
 243 def unicode_truncate(s, length, encoding='utf-8'):
 244     """https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer
 245     """
 246     encoded = s.encode(encoding)[:length]
 247     return encoded.decode(encoding, 'ignore')
 248
 249
 250 def format_filename(s):
 251     """
 252     It used to be a whitelist approach allowed only alphabet and a part of symbols.
 253     but most doujinshi's names include Japanese 2-byte characters and these was rejected.
 254     so it is using blacklist approach now.
 255     if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
 256     """
 257     # maybe you can use `--format` to select a suitable filename
 258     ban_chars = '\\\'/:,;*?"<>|\t'
 259     filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
 260     filename = ' '.join(filename.split())
 261
 262     while filename.endswith('.'):
 263         filename = filename[:-1]
 264
 265     if len(filename) > 100:
 266         filename = filename[:100] + u'…'
 267
 268     # Remove [] from filename
 269     filename = filename.replace('[]', '').strip()
 270     return filename
 271
 272
 273 def signal_handler(signal, frame):
 274     logger.error('Ctrl-C signal received. Stopping...')
 275     exit(1)
 276
 277
 278 def paging(page_string):
 279     # 1,3-5,14 -> [1, 3, 4, 5, 14]
 280     if not page_string:
 281         return []
 282
 283     page_list = []
 284     for i in page_string.split(','):
 285         if '-' in i:
 286             start, end = i.split('-')
 287             if not (start.isdigit() and end.isdigit()):
 288                 raise Exception('Invalid page number')
 289             page_list.extend(list(range(int(start), int(end) + 1)))
 290         else:
 291             if not i.isdigit():
 292                 raise Exception('Invalid page number')
 293             page_list.append(int(i))
 294
 295     return page_list
 296
 297
 298 def generate_metadata_file(output_dir, table, doujinshi_obj=None):
 299     logger.info('Writing Metadata Info')
 300
 301     if doujinshi_obj is not None:
 302         doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
 303     else:
 304         doujinshi_dir = '.'
 305
 306     logger.info(doujinshi_dir)
 307
 308     f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
 309
 310     fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'CIRCLE', 'SCANLATOR',
 311               'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
 312               'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
 313               'SERIES', 'PARODY', 'URL']
 314     special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR',
 315                       'LANGUAGE', 'TAGS', 'URL', 'PAGES']
 316
 317     for i in range(len(fields)):
 318         f.write('{}: '.format(fields[i]))
 319         if fields[i] in special_fields:
 320             f.write(str(table[special_fields.index(fields[i])][1]))
 321         f.write('\n')
 322
 323     f.close()
 324
 325
 326 class DB(object):
 327     conn = None
 328     cur = None
 329
 330     def __enter__(self):
 331         self.conn = sqlite3.connect(constant.NHENTAI_HISTORY)
 332         self.cur = self.conn.cursor()
 333         self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)')
 334         self.conn.commit()
 335         return self
 336
 337     def __exit__(self, exc_type, exc_val, exc_tb):
 338         self.conn.close()
 339
 340     def clean_all(self):
 341         self.cur.execute('DELETE FROM download_history WHERE 1')
 342         self.conn.commit()
 343
 344     def add_one(self, data):
 345         self.cur.execute('INSERT INTO download_history VALUES (?)', [data])
 346         self.conn.commit()
 347
 348     def get_all(self):
 349         data = self.cur.execute('SELECT id FROM download_history')
 350         return [i[0] for i in data]