]> git.lizzy.rs Git - rust.git/blob - src/librustc_lint/non_ascii_idents.rs
Rollup merge of #72153 - lcnr:exhaustively-match, r=pnkfelix
[rust.git] / src / librustc_lint / non_ascii_idents.rs
1 use crate::{EarlyContext, EarlyLintPass, LintContext};
2 use rustc_ast::ast;
3 use rustc_data_structures::fx::FxHashMap;
4 use rustc_span::symbol::{Ident, SymbolStr};
5 use std::hash::{Hash, Hasher};
6 use std::ops::Deref;
7
8 declare_lint! {
9     pub NON_ASCII_IDENTS,
10     Allow,
11     "detects non-ASCII identifiers"
12 }
13
14 declare_lint! {
15     pub UNCOMMON_CODEPOINTS,
16     Warn,
17     "detects uncommon Unicode codepoints in identifiers"
18 }
19
20 // FIXME: Change this to warn.
21 declare_lint! {
22     pub CONFUSABLE_IDENTS,
23     Allow,
24     "detects visually confusable pairs between identifiers"
25 }
26
27 declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
28
29 enum CowBoxSymStr {
30     Interned(SymbolStr),
31     Owned(Box<str>),
32 }
33
34 impl Deref for CowBoxSymStr {
35     type Target = str;
36
37     fn deref(&self) -> &str {
38         match self {
39             CowBoxSymStr::Interned(interned) => interned,
40             CowBoxSymStr::Owned(ref owned) => owned,
41         }
42     }
43 }
44
45 impl Hash for CowBoxSymStr {
46     #[inline]
47     fn hash<H: Hasher>(&self, state: &mut H) {
48         Hash::hash(&**self, state)
49     }
50 }
51
52 impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
53     #[inline]
54     fn eq(&self, other: &CowBoxSymStr) -> bool {
55         PartialEq::eq(&**self, &**other)
56     }
57 }
58
59 impl Eq for CowBoxSymStr {}
60
61 fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
62     use std::mem::swap;
63     use unicode_security::confusable_detection::skeleton;
64     buffer.clear();
65     buffer.extend(skeleton(&symbol_str));
66     if symbol_str == *buffer {
67         CowBoxSymStr::Interned(symbol_str)
68     } else {
69         let mut owned = String::new();
70         swap(buffer, &mut owned);
71         CowBoxSymStr::Owned(owned.into_boxed_str())
72     }
73 }
74
75 fn is_in_ascii_confusable_closure(c: char) -> bool {
76     // FIXME: move this table to `unicode_security` crate.
77     // data here corresponds to Unicode 13.
78     const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
79     let c = c as u64;
80     for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
81         if c >= range_start && c <= range_end {
82             return true;
83         }
84     }
85     false
86 }
87
88 fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
89     // FIXME: move this table to `unicode_security` crate.
90     // data here corresponds to Unicode 13.
91     const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
92         0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
93         0x2080,
94     ];
95     let c = c as u64;
96     for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
97         if c == item {
98             return true;
99         }
100     }
101     false
102 }
103
104 impl EarlyLintPass for NonAsciiIdents {
105     fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
106         use rustc_session::lint::Level;
107         if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
108             return;
109         }
110         let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
111         let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
112         let mut in_fast_path = true;
113         for (symbol, sp) in symbols.iter() {
114             // fast path
115             let symbol_str = symbol.as_str();
116             if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
117                 // fallback to slow path.
118                 symbol_strs_and_spans.clear();
119                 in_fast_path = false;
120                 break;
121             }
122             if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
123                 symbol_strs_and_spans.push((symbol_str, *sp));
124             }
125         }
126         if !in_fast_path {
127             // slow path
128             for (symbol, sp) in symbols.iter() {
129                 let symbol_str = symbol.as_str();
130                 symbol_strs_and_spans.push((symbol_str, *sp));
131             }
132         }
133         drop(symbols);
134         symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
135         let mut skeleton_map =
136             FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
137         let mut str_buf = String::new();
138         for (symbol_str, sp) in symbol_strs_and_spans {
139             let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
140             skeleton_map
141                 .entry(skeleton)
142                 .and_modify(|(existing_symbolstr, existing_span)| {
143                     cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
144                         lint.build(&format!(
145                             "identifier pair considered confusable between `{}` and `{}`",
146                             existing_symbolstr, symbol_str
147                         ))
148                         .span_label(
149                             *existing_span,
150                             "this is where the previous identifier occurred",
151                         )
152                         .emit();
153                     });
154                 })
155                 .or_insert((symbol_str, sp));
156         }
157     }
158     fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
159         use unicode_security::GeneralSecurityProfile;
160         let name_str = ident.name.as_str();
161         if name_str.is_ascii() {
162             return;
163         }
164         cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
165             lint.build("identifier contains non-ASCII characters").emit()
166         });
167         if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
168             cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
169                 lint.build("identifier contains uncommon Unicode codepoints").emit()
170             })
171         }
172     }
173 }