clippy_lints/src/unicode.rs

   1 use rustc::lint::*;
   2 use rustc::hir::*;
   3 use syntax::ast::LitKind;
   4 use syntax::codemap::Span;
   5 use unicode_normalization::UnicodeNormalization;
   6 use utils::{snippet, span_help_and_lint};
   7
   8 /// **What it does:** Checks for the Unicode zero-width space in the code.
   9 ///
  10 /// **Why is this bad?** Having an invisible character in the code makes for all
  11 /// sorts of April fools, but otherwise is very much frowned upon.
  12 ///
  13 /// **Known problems:** None.
  14 ///
  15 /// **Example:** You don't see it, but there may be a zero-width space somewhere in this text.
  16 declare_lint! {
  17     pub ZERO_WIDTH_SPACE,
  18     Deny,
  19     "using a zero-width space in a string literal, which is confusing"
  20 }
  21
  22 /// **What it does:** Checks for non-ASCII characters in string literals.
  23 ///
  24 /// **Why is this bad?** Yeah, we know, the 90's called and wanted their charset
  25 /// back. Even so, there still are editors and other programs out there that
  26 /// don't work well with Unicode. So if the code is meant to be used
  27 /// internationally, on multiple operating systems, or has other portability
  28 /// requirements, activating this lint could be useful.
  29 ///
  30 /// **Known problems:** None.
  31 ///
  32 /// **Example:**
  33 /// ```rust
  34 /// let x = "Hä?"
  35 /// ```
  36 declare_lint! {
  37     pub NON_ASCII_LITERAL,
  38     Allow,
  39     "using any literal non-ASCII chars in a string literal instead of \
  40      using the `\\u` escape"
  41 }
  42
  43 /// **What it does:** Checks for string literals that contain Unicode in a form
  44 /// that is not equal to its
  45 /// [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms).
  46 ///
  47 /// **Why is this bad?** If such a string is compared to another, the results
  48 /// may be surprising.
  49 ///
  50 /// **Known problems** None.
  51 ///
  52 /// **Example:** You may not see it, but “à” and “à” aren't the same string. The
  53 /// former when escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`.
  54 declare_lint! {
  55     pub UNICODE_NOT_NFC,
  56     Allow,
  57     "using a unicode literal not in NFC normal form (see \
  58      [unicode tr15](http://www.unicode.org/reports/tr15/) for further information)"
  59 }
  60
  61
  62 #[derive(Copy, Clone)]
  63 pub struct Unicode;
  64
  65 impl LintPass for Unicode {
  66     fn get_lints(&self) -> LintArray {
  67         lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC)
  68     }
  69 }
  70
  71 impl<'a, 'tcx> LateLintPass<'a, 'tcx> for Unicode {
  72     fn check_expr(&mut self, cx: &LateContext<'a, 'tcx>, expr: &'tcx Expr) {
  73         if let ExprLit(ref lit) = expr.node {
  74             if let LitKind::Str(_, _) = lit.node {
  75                 check_str(cx, lit.span)
  76             }
  77         }
  78     }
  79 }
  80
  81 fn escape<T: Iterator<Item = char>>(s: T) -> String {
  82     let mut result = String::new();
  83     for c in s {
  84         if c as u32 > 0x7F {
  85             for d in c.escape_unicode() {
  86                 result.push(d)
  87             }
  88         } else {
  89             result.push(c);
  90         }
  91     }
  92     result
  93 }
  94
  95 fn check_str(cx: &LateContext, span: Span) {
  96     let string = snippet(cx, span, "");
  97     if string.contains('\u{200B}') {
  98         span_help_and_lint(cx,
  99                            ZERO_WIDTH_SPACE,
 100                            span,
 101                            "zero-width space detected",
 102                            &format!("Consider replacing the string with:\n\"{}\"",
 103                                     string.replace("\u{200B}", "\\u{200B}")));
 104     }
 105     if string.chars().any(|c| c as u32 > 0x7F) {
 106         span_help_and_lint(cx,
 107                            NON_ASCII_LITERAL,
 108                            span,
 109                            "literal non-ASCII character detected",
 110                            &format!("Consider replacing the string with:\n\"{}\"",
 111                                     if cx.current_level(UNICODE_NOT_NFC) == Level::Allow {
 112                                         escape(string.chars())
 113                                     } else {
 114                                         escape(string.nfc())
 115                                     }));
 116     }
 117     if cx.current_level(NON_ASCII_LITERAL) == Level::Allow && string.chars().zip(string.nfc()).any(|(a, b)| a != b) {
 118         span_help_and_lint(cx,
 119                            UNICODE_NOT_NFC,
 120                            span,
 121                            "non-nfc unicode sequence detected",
 122                            &format!("Consider replacing the string with:\n\"{}\"", string.nfc().collect::<String>()));
 123     }
 124 }