1 use crate::{EarlyContext, EarlyLintPass, LintContext};
3 use rustc_data_structures::fx::FxHashMap;
4 use rustc_span::symbol::SymbolStr;
9 "detects non-ASCII identifiers",
14 pub UNCOMMON_CODEPOINTS,
16 "detects uncommon Unicode codepoints in identifiers",
21 pub CONFUSABLE_IDENTS,
23 "detects visually confusable pairs between identifiers",
28 pub MIXED_SCRIPT_CONFUSABLES,
30 "detects Unicode scripts whose mixed script confusables codepoints are solely used",
34 declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS, MIXED_SCRIPT_CONFUSABLES]);
36 impl EarlyLintPass for NonAsciiIdents {
37 fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
38 use rustc_session::lint::Level;
40 use std::collections::BTreeMap;
41 use unicode_security::GeneralSecurityProfile;
42 use utils::CowBoxSymStr;
44 let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
45 let check_uncommon_codepoints =
46 cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
47 let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
48 let check_mixed_script_confusables =
49 cx.builder.lint_level(MIXED_SCRIPT_CONFUSABLES).0 != Level::Allow;
51 if !check_non_ascii_idents
52 && !check_uncommon_codepoints
53 && !check_confusable_idents
54 && !check_mixed_script_confusables
59 let mut has_non_ascii_idents = false;
60 let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
61 for (symbol, &sp) in symbols.iter() {
62 let symbol_str = symbol.as_str();
63 if symbol_str.is_ascii() {
66 has_non_ascii_idents = true;
67 cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
68 lint.build("identifier contains non-ASCII characters").emit()
70 if check_uncommon_codepoints
71 && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
73 cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
74 lint.build("identifier contains uncommon Unicode codepoints").emit()
79 if has_non_ascii_idents && check_confusable_idents {
80 let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
81 FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
82 let mut str_buf = String::new();
83 for (symbol, &sp) in symbols.iter() {
84 fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
85 use std::mem::replace;
86 use unicode_security::confusable_detection::skeleton;
88 buffer.extend(skeleton(symbol_str));
89 if *symbol_str == *buffer {
90 CowBoxSymStr::Interned(symbol_str.clone())
92 let owned = replace(buffer, String::new());
93 CowBoxSymStr::Owned(owned.into_boxed_str())
96 let symbol_str = symbol.as_str();
97 let is_ascii = symbol_str.is_ascii();
98 let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
101 .and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
102 if !*existing_is_ascii || !is_ascii {
103 cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
105 "identifier pair considered confusable between `{}` and `{}`",
106 existing_symbolstr, symbol_str
110 "this is where the previous identifier occurred",
115 if *existing_is_ascii && !is_ascii {
116 *existing_symbolstr = symbol_str.clone();
118 *existing_is_ascii = is_ascii;
121 .or_insert((symbol_str, sp, is_ascii));
125 if has_non_ascii_idents && check_mixed_script_confusables {
126 use unicode_security::is_potential_mixed_script_confusable_char;
127 use unicode_security::mixed_script::AugmentedScriptSet;
130 enum ScriptSetUsage {
131 Suspicious(Vec<char>, Span),
135 let mut script_states: FxHashMap<AugmentedScriptSet, ScriptSetUsage> =
136 FxHashMap::default();
137 let latin_augmented_script_set = AugmentedScriptSet::for_char('A');
138 script_states.insert(latin_augmented_script_set, ScriptSetUsage::Verified);
140 let mut has_suspicous = false;
141 for (symbol, &sp) in symbols.iter() {
142 let symbol_str = symbol.as_str();
143 for ch in symbol_str.chars() {
145 // all ascii characters are covered by exception.
148 if !GeneralSecurityProfile::identifier_allowed(ch) {
149 // this character is covered by `uncommon_codepoints` lint.
152 let augmented_script_set = AugmentedScriptSet::for_char(ch);
154 .entry(augmented_script_set)
155 .and_modify(|existing_state| {
156 if let ScriptSetUsage::Suspicious(ch_list, _) = existing_state {
157 if is_potential_mixed_script_confusable_char(ch) {
160 *existing_state = ScriptSetUsage::Verified;
165 if !is_potential_mixed_script_confusable_char(ch) {
166 ScriptSetUsage::Verified
168 has_suspicous = true;
169 ScriptSetUsage::Suspicious(vec![ch], sp)
176 let verified_augmented_script_sets = script_states
178 .flat_map(|(k, v)| match v {
179 ScriptSetUsage::Verified => Some(*k),
182 .collect::<Vec<_>>();
184 // we're sorting the output here.
185 let mut lint_reports: BTreeMap<(Span, Vec<char>), AugmentedScriptSet> =
188 'outerloop: for (augment_script_set, usage) in script_states {
189 let (mut ch_list, sp) = match usage {
190 ScriptSetUsage::Verified => continue,
191 ScriptSetUsage::Suspicious(ch_list, sp) => (ch_list, sp),
194 if augment_script_set.is_all() {
198 for existing in verified_augmented_script_sets.iter() {
199 if existing.is_all() {
202 let mut intersect = *existing;
203 intersect.intersect_with(augment_script_set);
204 if !intersect.is_empty() && !intersect.is_all() {
211 lint_reports.insert((sp, ch_list), augment_script_set);
214 for ((sp, ch_list), script_set) in lint_reports {
215 cx.struct_span_lint(MIXED_SCRIPT_CONFUSABLES, sp, |lint| {
216 let message = format!(
217 "The usage of Script Group `{}` in this crate consists solely of mixed script confusables",
219 let mut note = "The usage includes ".to_string();
220 for (idx, ch) in ch_list.into_iter().enumerate() {
224 let char_info = format!("'{}' (U+{:04X})", ch, ch as u32);
228 lint.build(&message).note(¬e).note("Please recheck to make sure their usages are indeed what you want.").emit()
237 use rustc_span::symbol::SymbolStr;
238 use std::hash::{Hash, Hasher};
241 pub(super) enum CowBoxSymStr {
246 impl Deref for CowBoxSymStr {
249 fn deref(&self) -> &str {
251 CowBoxSymStr::Interned(interned) => interned,
252 CowBoxSymStr::Owned(ref owned) => owned,
257 impl Hash for CowBoxSymStr {
259 fn hash<H: Hasher>(&self, state: &mut H) {
260 Hash::hash(&**self, state)
264 impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
266 fn eq(&self, other: &CowBoxSymStr) -> bool {
267 PartialEq::eq(&**self, &**other)
271 impl Eq for CowBoxSymStr {}