1 use crate::{EarlyContext, EarlyLintPass, LintContext};
3 use rustc_data_structures::fx::FxHashMap;
4 use rustc_span::symbol::{Ident, SymbolStr};
5 use std::hash::{Hash, Hasher};
11 "detects non-ASCII identifiers"
15 pub UNCOMMON_CODEPOINTS,
17 "detects uncommon Unicode codepoints in identifiers"
20 // FIXME: Change this to warn.
22 pub CONFUSABLE_IDENTS,
24 "detects visually confusable pairs between identifiers"
27 declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
34 impl Deref for CowBoxSymStr {
37 fn deref(&self) -> &str {
39 CowBoxSymStr::Interned(interned) => interned,
40 CowBoxSymStr::Owned(ref owned) => owned,
45 impl Hash for CowBoxSymStr {
47 fn hash<H: Hasher>(&self, state: &mut H) {
48 Hash::hash(&**self, state)
52 impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
54 fn eq(&self, other: &CowBoxSymStr) -> bool {
55 PartialEq::eq(&**self, &**other)
59 impl Eq for CowBoxSymStr {}
61 fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
63 use unicode_security::confusable_detection::skeleton;
65 buffer.extend(skeleton(&symbol_str));
66 if symbol_str == *buffer {
67 CowBoxSymStr::Interned(symbol_str)
69 let mut owned = String::new();
70 swap(buffer, &mut owned);
71 CowBoxSymStr::Owned(owned.into_boxed_str())
75 fn is_in_ascii_confusable_closure(c: char) -> bool {
76 // FIXME: move this table to `unicode_security` crate.
77 // data here corresponds to Unicode 13.
78 const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
80 for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
81 if c >= range_start && c <= range_end {
88 fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
89 // FIXME: move this table to `unicode_security` crate.
90 // data here corresponds to Unicode 13.
91 const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
92 0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
96 for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
104 impl EarlyLintPass for NonAsciiIdents {
105 fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
106 use rustc_session::lint::Level;
107 if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
110 let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
111 let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
112 let mut in_fast_path = true;
113 for (symbol, sp) in symbols.iter() {
115 let symbol_str = symbol.as_str();
116 if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
117 // fallback to slow path.
118 symbol_strs_and_spans.clear();
119 in_fast_path = false;
122 if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
123 symbol_strs_and_spans.push((symbol_str, *sp));
128 for (symbol, sp) in symbols.iter() {
129 let symbol_str = symbol.as_str();
130 symbol_strs_and_spans.push((symbol_str, *sp));
134 symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
135 let mut skeleton_map =
136 FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
137 let mut str_buf = String::new();
138 for (symbol_str, sp) in symbol_strs_and_spans {
139 let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
142 .and_modify(|(existing_symbolstr, existing_span)| {
143 cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
145 "identifier pair considered confusable between `{}` and `{}`",
146 existing_symbolstr, symbol_str
150 "this is where the previous identifier occurred",
155 .or_insert((symbol_str, sp));
158 fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
159 use unicode_security::GeneralSecurityProfile;
160 let name_str = ident.name.as_str();
161 if name_str.is_ascii() {
164 cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
165 lint.build("identifier contains non-ASCII characters").emit()
167 if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
168 cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
169 lint.build("identifier contains uncommon Unicode codepoints").emit()