X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=nhentai%2Futils.py;h=c28a3ee3c6da0dd00ecce58f1a759bee1b9f6f33;hb=db59426503f14c9d4fefe34c21761abfd32743af;hp=21e47f150864a89db101dbd229f4a1cd4e737498;hpb=caa0753adb4e8901f83adaa40548837f682d5a7c;p=nhentai.git diff --git a/nhentai/utils.py b/nhentai/utils.py index 21e47f1..c28a3ee 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -1,9 +1,35 @@ # coding: utf-8 -from __future__ import unicode_literals, print_function +import sys +import re import os -import string +import zipfile +import shutil +import requests +import sqlite3 + +from nhentai import constant from nhentai.logger import logger +from nhentai.serializer import serialize_json, serialize_comicxml, set_js_database + + +def request(method, url, **kwargs): + session = requests.Session() + session.headers.update({ + 'Referer': constant.LOGIN_URL, + 'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)', + 'Cookie': constant.CONFIG['cookie'] + }) + return getattr(session, method)(url, proxies=constant.CONFIG['proxy'], verify=False, **kwargs) + + +def check_cookie(): + response = request('get', constant.BASE_URL).text + username = re.findall('"/users/\d+/(.*?)"', response) + if not username: + logger.error('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') + else: + logger.info('Login successfully! Your username: {}'.format(username[0])) class _Singleton(type): @@ -29,59 +55,253 @@ def urlparse(url): return urlparse(url) -def generate_html(output_dir='.', doujinshi_obj=None): +def readfile(path): + loc = os.path.dirname(__file__) + + with open(os.path.join(loc, path), 'r') as file: + return file.read() + + +def generate_html(output_dir='.', doujinshi_obj=None, template='default'): image_html = '' - previous = '' if doujinshi_obj is not None: - doujinshi_dir = os.path.join(output_dir, format_filename('%s-%s' % (doujinshi_obj.id, - doujinshi_obj.name[:200]))) + doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) else: doujinshi_dir = '.' file_list = os.listdir(doujinshi_dir) file_list.sort() - for index, image in enumerate(file_list): + for image in file_list: if not os.path.splitext(image)[1] in ('.jpg', '.png'): continue - try: - next_ = file_list[file_list.index(image) + 1] - except IndexError: - next_ = '' + image_html += '\n'\ + .format(image) + html = readfile('viewer/{}/index.html'.format(template)) + css = readfile('viewer/{}/styles.css'.format(template)) + js = readfile('viewer/{}/scripts.js'.format(template)) + + if doujinshi_obj is not None: + serialize_json(doujinshi_obj, doujinshi_dir) + name = doujinshi_obj.name + if sys.version_info < (3, 0): + name = doujinshi_obj.name.encode('utf-8') + else: + name = {'title': 'nHentai HTML Viewer'} + + data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) + try: + if sys.version_info < (3, 0): + with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: + f.write(data) + else: + with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f: + f.write(data.encode('utf-8')) + + logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) + except Exception as e: + logger.warning('Writing HTML Viewer failed ({})'.format(str(e))) - image_html += '\n'\ - .format(image, 'current' if index == 0 else '', previous, next_) - previous = image - with open(os.path.join(os.path.dirname(__file__), 'doujinshi.html'), 'r') as template: - html = template.read() +def generate_main_html(output_dir='./'): + """ + Generate a main html to show all the contain doujinshi. + With a link to their `index.html`. + Default output folder will be the CLI path. + """ + image_html = '' + + main = readfile('viewer/main.html') + css = readfile('viewer/main.css') + js = readfile('viewer/main.js') + + element = '\n\ + \n' + + os.chdir(output_dir) + doujinshi_dirs = next(os.walk('.'))[1] + + for folder in doujinshi_dirs: + files = os.listdir(folder) + files.sort() + + if 'index.html' in files: + logger.info('Add doujinshi \'{}\''.format(folder)) + else: + continue + + image = files[0] # 001.jpg or 001.png + if folder is not None: + title = folder.replace('_', ' ') + else: + title = 'nHentai HTML Viewer' + + image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title) + if image_html == '': + logger.warning('No index.html found, --gen-main paused.') + return + try: + data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html) + if sys.version_info < (3, 0): + with open('./main.html', 'w') as f: + f.write(data) + else: + with open('./main.html', 'wb') as f: + f.write(data.encode('utf-8')) + shutil.copy(os.path.dirname(__file__)+'/viewer/logo.png', './') + set_js_database() + logger.log( + 15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir)) + except Exception as e: + logger.warning('Writing Main Viewer failed ({})'.format(str(e))) + + +def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=False): if doujinshi_obj is not None: - title = doujinshi_obj.name + doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) + if write_comic_info: + serialize_comicxml(doujinshi_obj, doujinshi_dir) + cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename)) else: - title = 'nHentai HTML Viewer' + cbz_filename = './doujinshi.cbz' + doujinshi_dir = '.' - data = html.format(TITLE=title, IMAGES=image_html) - with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: - f.write(data) + file_list = os.listdir(doujinshi_dir) + file_list.sort() + + logger.info('Writing CBZ file to path: {}'.format(cbz_filename)) + with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf: + for image in file_list: + image_path = os.path.join(doujinshi_dir, image) + cbz_pf.write(image_path, image) + + if rm_origin_dir: + shutil.rmtree(doujinshi_dir, ignore_errors=True) + + logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir)) + + +def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): + try: + import img2pdf + + """Write images to a PDF file using img2pdf.""" + if doujinshi_obj is not None: + doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) + pdf_filename = os.path.join( + os.path.join(doujinshi_dir, '..'), + '{}.pdf'.format(doujinshi_obj.filename) + ) + else: + pdf_filename = './doujinshi.pdf' + doujinshi_dir = '.' - logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) + file_list = os.listdir(doujinshi_dir) + file_list.sort() + + logger.info('Writing PDF file to path: {}'.format(pdf_filename)) + with open(pdf_filename, 'wb') as pdf_f: + full_path_list = ( + [os.path.join(doujinshi_dir, image) for image in file_list] + ) + pdf_f.write(img2pdf.convert(full_path_list)) + + if rm_origin_dir: + shutil.rmtree(doujinshi_dir, ignore_errors=True) + + logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir)) + + except ImportError: + logger.error("Please install img2pdf package by using pip.") + +def unicode_truncate(s, length, encoding='utf-8'): + """https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer + """ + encoded = s.encode(encoding)[:length] + return encoded.decode(encoding, 'ignore') def format_filename(s): - """Take a string and return a valid filename constructed from the string. -Uses a whitelist approach: any characters not present in valid_chars are -removed. Also spaces are replaced with underscores. - -Note: this method may produce invalid filenames such as ``, `.` or `..` -When I use this method I prepend a date string like '2009_01_15_19_46_32_' -and append a file extension like '.txt', so I avoid the potential of using -an invalid filename. - -""" - valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) - filename = ''.join(c for c in s if c in valid_chars) - filename = filename.replace(' ', '_') # I don't like spaces in filenames. + """ + It used to be a whitelist approach allowed only alphabet and a part of symbols. + but most doujinshi's names include Japanese 2-byte characters and these was rejected. + so it is using blacklist approach now. + if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). + """ + # maybe you can use `--format` to select a suitable filename + ban_chars = '\\\'/:,;*?"<>|\t' + filename = s.translate(str.maketrans(ban_chars, ' '*len(ban_chars))).strip() + filename = ' '.join(filename.split()) + print(repr(filename)) + + while filename.endswith('.'): + filename = filename[:-1] + + if len(filename) > 100: + filename = filename[:100] + u'…' + + # Remove [] from filename + filename = filename.replace('[]', '').strip() return filename + + +def signal_handler(signal, frame): + logger.error('Ctrl-C signal received. Stopping...') + exit(1) + + +def paging(page_string): + # 1,3-5,14 -> [1, 3, 4, 5, 14] + if not page_string: + return [] + + page_list = [] + for i in page_string.split(','): + if '-' in i: + start, end = i.split('-') + if not (start.isdigit() and end.isdigit()): + raise Exception('Invalid page number') + page_list.extend(list(range(int(start), int(end)+1))) + else: + if not i.isdigit(): + raise Exception('Invalid page number') + page_list.append(int(i)) + + return page_list + + +class DB(object): + conn = None + cur = None + + def __enter__(self): + self.conn = sqlite3.connect(constant.NHENTAI_HISTORY) + self.cur = self.conn.cursor() + self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)') + self.conn.commit() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.conn.close() + + def clean_all(self): + self.cur.execute('DELETE FROM download_history WHERE 1') + self.conn.commit() + + def add_one(self, data): + self.cur.execute('INSERT INTO download_history VALUES (?)', [data]) + self.conn.commit() + + def get_all(self): + data = self.cur.execute('SELECT id FROM download_history') + return [i[0] for i in data]