fix #193

[nhentai.git] / nhentai / utils.py
diff --git a/nhentai/utils.py b/nhentai/utils.py

index 16f8bdf84a542cc0ab15b51eb9eb22758fcccb72..099f6a12042912900743a14e20bd1dc3e077766c 100644 (file)
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -1,17 +1,16 @@
  # coding: utf-8
-from __future__ import unicode_literals, print_function
  
  import sys
  import re
  import os
-import string
  import zipfile
  import shutil
  import requests
+import sqlite3
  
  from nhentai import constant
  from nhentai.logger import logger
-from nhentai.serializer import serialize, set_js_database
+from nhentai.serializer import serialize_json, serialize_comicxml, set_js_database
  
  
  def request(method, url, **kwargs):
@@ -19,9 +18,9 @@ def request(method, url, **kwargs):
      session.headers.update({
          'Referer': constant.LOGIN_URL,
          'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
-        'Cookie': constant.COOKIE
+        'Cookie': constant.CONFIG['cookie']
      })
-    return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)
+    return getattr(session, method)(url, proxies=constant.CONFIG['proxy'], verify=False, **kwargs)
  
  
  def check_cookie():
@@ -63,7 +62,7 @@ def readfile(path):
          return file.read()
  
  
-def generate_html(output_dir='.', doujinshi_obj=None):
+def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
      image_html = ''
  
      if doujinshi_obj is not None:
@@ -80,12 +79,12 @@ def generate_html(output_dir='.', doujinshi_obj=None):
  
          image_html += '<img src="{0}" class="image-item"/>\n'\
              .format(image)
-    html = readfile('viewer/index.html')
-    css = readfile('viewer/styles.css')
-    js = readfile('viewer/scripts.js')
+    html = readfile('viewer/{}/index.html'.format(template))
+    css = readfile('viewer/{}/styles.css'.format(template))
+    js = readfile('viewer/{}/scripts.js'.format(template))
  
      if doujinshi_obj is not None:
-        serialize(doujinshi_obj, doujinshi_dir)
+        serialize_json(doujinshi_obj, doujinshi_dir)
          name = doujinshi_obj.name
          if sys.version_info < (3, 0):
              name = doujinshi_obj.name.encode('utf-8')
@@ -101,9 +100,9 @@ def generate_html(output_dir='.', doujinshi_obj=None):
              with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
                  f.write(data.encode('utf-8'))
  
-        logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
+        logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
      except Exception as e:
-        logger.warning('Writen HTML Viewer failed ({})'.format(str(e)))
+        logger.warning('Writing HTML Viewer failed ({})'.format(str(e)))
  
  
  def generate_main_html(output_dir='./'):
@@ -149,7 +148,7 @@ def generate_main_html(output_dir='./'):
  
          image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
      if image_html == '':
-        logger.warning('None index.html found, --gen-main paused.')
+        logger.warning('No index.html found, --gen-main paused.')
          return
      try:
          data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html)
@@ -162,14 +161,16 @@ def generate_main_html(output_dir='./'):
          shutil.copy(os.path.dirname(__file__)+'/viewer/logo.png', './')
          set_js_database()
          logger.log(
-            15, 'Main Viewer has been write to \'{0}main.html\''.format(output_dir))
+            15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir))
      except Exception as e:
-        logger.warning('Writen Main Viewer failed ({})'.format(str(e)))
+        logger.warning('Writing Main Viewer failed ({})'.format(str(e)))
  
  
-def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
+def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=False):
      if doujinshi_obj is not None:
          doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
+        if write_comic_info:
+            serialize_comicxml(doujinshi_obj, doujinshi_dir)
          cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename))
      else:
          cbz_filename = './doujinshi.cbz'
@@ -187,30 +188,120 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
      if rm_origin_dir:
          shutil.rmtree(doujinshi_dir, ignore_errors=True)
  
-    logger.log(15, 'Comic Book CBZ file has been write to \'{0}\''.format(doujinshi_dir))
+    logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir))
+
+
+def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
+    try:
+        import img2pdf
+    except ImportError:
+        logger.error("Please install img2pdf package by using pip.")
+
+    """Write images to a PDF file using img2pdf."""
+    if doujinshi_obj is not None:
+        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
+        pdf_filename = os.path.join(
+            os.path.join(doujinshi_dir, '..'),
+            '{}.pdf'.format(doujinshi_obj.filename)
+        )
+    else:
+        pdf_filename = './doujinshi.pdf'
+        doujinshi_dir = '.'
+
+    file_list = os.listdir(doujinshi_dir)
+    file_list.sort()
+
+    logger.info('Writing PDF file to path: {}'.format(pdf_filename))
+    with open(pdf_filename, 'wb') as pdf_f:
+        full_path_list = (
+            [os.path.join(doujinshi_dir, image) for image in file_list]
+        )
+        pdf_f.write(img2pdf.convert(full_path_list))
+
+    if rm_origin_dir:
+        shutil.rmtree(doujinshi_dir, ignore_errors=True)
+
+    logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
+
+
+def unicode_truncate(s, length, encoding='utf-8'):
+    """https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer
+    """
+    encoded = s.encode(encoding)[:length]
+    return encoded.decode(encoding, 'ignore')
  
  
  def format_filename(s):
-    """Take a string and return a valid filename constructed from the string.
-Uses a whitelist approach: any characters not present in valid_chars are
-removed. Also spaces are replaced with underscores.
-
-Note: this method may produce invalid filenames such as ``, `.` or `..`
-When I use this method I prepend a date string like '2009_01_15_19_46_32_'
-and append a file extension like '.txt', so I avoid the potential of using
-an invalid filename.
-
-"""
-    valid_chars = "-_.()[] %s%s" % (string.ascii_letters, string.digits)
-    filename = ''.join(c for c in s if c in valid_chars)
+    """
+    It used to be a whitelist approach allowed only alphabet and a part of symbols.
+    but most doujinshi's names include Japanese 2-byte characters and these was rejected.
+    so it is using blacklist approach now.
+    if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). 
+    """
+    # maybe you can use `--format` to select a suitable filename
+    ban_chars = '\\\'/:,;*?"<>|\t'
+    filename = s.translate(str.maketrans(ban_chars, ' '*len(ban_chars))).strip()
+    filename = ' '.join(filename.split())
+    print(repr(filename))
+
+    while filename.endswith('.'):
+        filename = filename[:-1]
+
      if len(filename) > 100:
-        filename = filename[:100] + '...]'
+        filename = filename[:100] + u'…'
  
      # Remove [] from filename
-    filename = filename.replace('[]', '')
+    filename = filename.replace('[]', '').strip()
      return filename
  
  
  def signal_handler(signal, frame):
      logger.error('Ctrl-C signal received. Stopping...')
      exit(1)
+
+
+def paging(page_string):
+    # 1,3-5,14 -> [1, 3, 4, 5, 14]
+    if not page_string:
+        return []
+
+    page_list = []
+    for i in page_string.split(','):
+        if '-' in i:
+            start, end = i.split('-')
+            if not (start.isdigit() and end.isdigit()):
+                raise Exception('Invalid page number')
+            page_list.extend(list(range(int(start), int(end)+1)))
+        else:
+            if not i.isdigit():
+                raise Exception('Invalid page number')
+            page_list.append(int(i))
+
+    return page_list
+
+
+class DB(object):
+    conn = None
+    cur = None
+
+    def __enter__(self):
+        self.conn = sqlite3.connect(constant.NHENTAI_HISTORY)
+        self.cur = self.conn.cursor()
+        self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)')
+        self.conn.commit()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.conn.close()
+
+    def clean_all(self):
+        self.cur.execute('DELETE FROM download_history WHERE 1')
+        self.conn.commit()
+
+    def add_one(self, data):
+        self.cur.execute('INSERT INTO download_history VALUES (?)', [data])
+        self.conn.commit()
+
+    def get_all(self):
+        data = self.cur.execute('SELECT id FROM download_history')
+        return [i[0] for i in data]