fix #193

[nhentai.git] / nhentai / utils.py
diff --git a/nhentai/utils.py b/nhentai/utils.py

index 1ee6e7e485f680b5697911dead207015b01e9829..099f6a12042912900743a14e20bd1dc3e077766c 100644 (file)
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -1,10 +1,8 @@
  # coding: utf-8
-from __future__ import unicode_literals, print_function
  
  import sys
  import re
  import os
-import string
  import zipfile
  import shutil
  import requests
@@ -226,6 +224,13 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
      logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
  
  
+def unicode_truncate(s, length, encoding='utf-8'):
+    """https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer
+    """
+    encoded = s.encode(encoding)[:length]
+    return encoded.decode(encoding, 'ignore')
+
+
  def format_filename(s):
      """
      It used to be a whitelist approach allowed only alphabet and a part of symbols.
@@ -234,10 +239,16 @@ def format_filename(s):
      if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). 
      """
      # maybe you can use `--format` to select a suitable filename
-    ban_chars = '\\\'/:,;*?"<>|'
-    filename = s.translate(str.maketrans(ban_chars, ' '*len(ban_chars)))
+    ban_chars = '\\\'/:,;*?"<>|\t'
+    filename = s.translate(str.maketrans(ban_chars, ' '*len(ban_chars))).strip()
+    filename = ' '.join(filename.split())
+    print(repr(filename))
+
+    while filename.endswith('.'):
+        filename = filename[:-1]
+
      if len(filename) > 100:
-        filename = filename[:100] + '...]'
+        filename = filename[:100] + u'…'
  
      # Remove [] from filename
      filename = filename.replace('[]', '').strip()