]> git.lizzy.rs Git - rust.git/blob - src/librustc_lint/non_ascii_idents.rs
Provide a better diagnostic if ninja isn't installed
[rust.git] / src / librustc_lint / non_ascii_idents.rs
1 use crate::{EarlyContext, EarlyLintPass, LintContext};
2 use rustc_ast as ast;
3 use rustc_data_structures::fx::FxHashMap;
4 use rustc_span::symbol::Symbol;
5
6 declare_lint! {
7     pub NON_ASCII_IDENTS,
8     Allow,
9     "detects non-ASCII identifiers",
10     crate_level_only
11 }
12
13 declare_lint! {
14     pub UNCOMMON_CODEPOINTS,
15     Warn,
16     "detects uncommon Unicode codepoints in identifiers",
17     crate_level_only
18 }
19
20 declare_lint! {
21     pub CONFUSABLE_IDENTS,
22     Warn,
23     "detects visually confusable pairs between identifiers",
24     crate_level_only
25 }
26
27 declare_lint! {
28     pub MIXED_SCRIPT_CONFUSABLES,
29     Warn,
30     "detects Unicode scripts whose mixed script confusables codepoints are solely used",
31     crate_level_only
32 }
33
34 declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS, MIXED_SCRIPT_CONFUSABLES]);
35
36 impl EarlyLintPass for NonAsciiIdents {
37     fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
38         use rustc_session::lint::Level;
39         use rustc_span::Span;
40         use std::collections::BTreeMap;
41         use unicode_security::GeneralSecurityProfile;
42
43         let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
44         let check_uncommon_codepoints =
45             cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
46         let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
47         let check_mixed_script_confusables =
48             cx.builder.lint_level(MIXED_SCRIPT_CONFUSABLES).0 != Level::Allow;
49
50         if !check_non_ascii_idents
51             && !check_uncommon_codepoints
52             && !check_confusable_idents
53             && !check_mixed_script_confusables
54         {
55             return;
56         }
57
58         let mut has_non_ascii_idents = false;
59         let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
60
61         // Sort by `Span` so that error messages make sense with respect to the
62         // order of identifier locations in the code.
63         let mut symbols: Vec<_> = symbols.iter().collect();
64         symbols.sort_by_key(|k| k.1);
65
66         for (symbol, &sp) in symbols.iter() {
67             let symbol_str = symbol.as_str();
68             if symbol_str.is_ascii() {
69                 continue;
70             }
71             has_non_ascii_idents = true;
72             cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
73                 lint.build("identifier contains non-ASCII characters").emit()
74             });
75             if check_uncommon_codepoints
76                 && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
77             {
78                 cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
79                     lint.build("identifier contains uncommon Unicode codepoints").emit()
80                 })
81             }
82         }
83
84         if has_non_ascii_idents && check_confusable_idents {
85             let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
86                 FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
87             let mut skeleton_buf = String::new();
88
89             for (&symbol, &sp) in symbols.iter() {
90                 use unicode_security::confusable_detection::skeleton;
91
92                 let symbol_str = symbol.as_str();
93                 let is_ascii = symbol_str.is_ascii();
94
95                 // Get the skeleton as a `Symbol`.
96                 skeleton_buf.clear();
97                 skeleton_buf.extend(skeleton(&symbol_str));
98                 let skeleton_sym = if *symbol_str == *skeleton_buf {
99                     symbol
100                 } else {
101                     Symbol::intern(&skeleton_buf)
102                 };
103
104                 skeleton_map
105                     .entry(skeleton_sym)
106                     .and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
107                         if !*existing_is_ascii || !is_ascii {
108                             cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
109                                 lint.build(&format!(
110                                     "identifier pair considered confusable between `{}` and `{}`",
111                                     existing_symbol.as_str(),
112                                     symbol.as_str()
113                                 ))
114                                 .span_label(
115                                     *existing_span,
116                                     "this is where the previous identifier occurred",
117                                 )
118                                 .emit();
119                             });
120                         }
121                         if *existing_is_ascii && !is_ascii {
122                             *existing_symbol = symbol;
123                             *existing_span = sp;
124                             *existing_is_ascii = is_ascii;
125                         }
126                     })
127                     .or_insert((symbol, sp, is_ascii));
128             }
129         }
130
131         if has_non_ascii_idents && check_mixed_script_confusables {
132             use unicode_security::is_potential_mixed_script_confusable_char;
133             use unicode_security::mixed_script::AugmentedScriptSet;
134
135             #[derive(Clone)]
136             enum ScriptSetUsage {
137                 Suspicious(Vec<char>, Span),
138                 Verified,
139             }
140
141             let mut script_states: FxHashMap<AugmentedScriptSet, ScriptSetUsage> =
142                 FxHashMap::default();
143             let latin_augmented_script_set = AugmentedScriptSet::for_char('A');
144             script_states.insert(latin_augmented_script_set, ScriptSetUsage::Verified);
145
146             let mut has_suspicous = false;
147             for (symbol, &sp) in symbols.iter() {
148                 let symbol_str = symbol.as_str();
149                 for ch in symbol_str.chars() {
150                     if ch.is_ascii() {
151                         // all ascii characters are covered by exception.
152                         continue;
153                     }
154                     if !GeneralSecurityProfile::identifier_allowed(ch) {
155                         // this character is covered by `uncommon_codepoints` lint.
156                         continue;
157                     }
158                     let augmented_script_set = AugmentedScriptSet::for_char(ch);
159                     script_states
160                         .entry(augmented_script_set)
161                         .and_modify(|existing_state| {
162                             if let ScriptSetUsage::Suspicious(ch_list, _) = existing_state {
163                                 if is_potential_mixed_script_confusable_char(ch) {
164                                     ch_list.push(ch);
165                                 } else {
166                                     *existing_state = ScriptSetUsage::Verified;
167                                 }
168                             }
169                         })
170                         .or_insert_with(|| {
171                             if !is_potential_mixed_script_confusable_char(ch) {
172                                 ScriptSetUsage::Verified
173                             } else {
174                                 has_suspicous = true;
175                                 ScriptSetUsage::Suspicious(vec![ch], sp)
176                             }
177                         });
178                 }
179             }
180
181             if has_suspicous {
182                 let verified_augmented_script_sets = script_states
183                     .iter()
184                     .flat_map(|(k, v)| match v {
185                         ScriptSetUsage::Verified => Some(*k),
186                         _ => None,
187                     })
188                     .collect::<Vec<_>>();
189
190                 // we're sorting the output here.
191                 let mut lint_reports: BTreeMap<(Span, Vec<char>), AugmentedScriptSet> =
192                     BTreeMap::new();
193
194                 'outerloop: for (augment_script_set, usage) in script_states {
195                     let (mut ch_list, sp) = match usage {
196                         ScriptSetUsage::Verified => continue,
197                         ScriptSetUsage::Suspicious(ch_list, sp) => (ch_list, sp),
198                     };
199
200                     if augment_script_set.is_all() {
201                         continue;
202                     }
203
204                     for existing in verified_augmented_script_sets.iter() {
205                         if existing.is_all() {
206                             continue;
207                         }
208                         let mut intersect = *existing;
209                         intersect.intersect_with(augment_script_set);
210                         if !intersect.is_empty() && !intersect.is_all() {
211                             continue 'outerloop;
212                         }
213                     }
214
215                     ch_list.sort();
216                     ch_list.dedup();
217                     lint_reports.insert((sp, ch_list), augment_script_set);
218                 }
219
220                 for ((sp, ch_list), script_set) in lint_reports {
221                     cx.struct_span_lint(MIXED_SCRIPT_CONFUSABLES, sp, |lint| {
222                         let message = format!(
223                             "The usage of Script Group `{}` in this crate consists solely of mixed script confusables",
224                             script_set);
225                         let mut note = "The usage includes ".to_string();
226                         for (idx, ch) in ch_list.into_iter().enumerate() {
227                             if idx != 0 {
228                                 note += ", ";
229                             }
230                             let char_info = format!("'{}' (U+{:04X})", ch, ch as u32);
231                             note += &char_info;
232                         }
233                         note += ".";
234                         lint.build(&message).note(&note).note("Please recheck to make sure their usages are indeed what you want.").emit()
235                     });
236                 }
237             }
238         }
239     }
240 }