]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_lint/src/hidden_unicode_codepoints.rs
Rollup merge of #107656 - jonhoo:bump-rust-installer, r=Mark-Simulacrum
[rust.git] / compiler / rustc_lint / src / hidden_unicode_codepoints.rs
1 use crate::{
2     lints::{
3         HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels,
4         HiddenUnicodeCodepointsDiagSub,
5     },
6     EarlyContext, EarlyLintPass, LintContext,
7 };
8 use ast::util::unicode::{contains_text_flow_control_chars, TEXT_FLOW_CONTROL_CHARS};
9 use rustc_ast as ast;
10 use rustc_span::{BytePos, Span, Symbol};
11
12 declare_lint! {
13     /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
14     /// visual representation of text on screen in a way that does not correspond to their on
15     /// memory representation.
16     ///
17     /// ### Explanation
18     ///
19     /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
20     /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
21     /// its direction on software that supports these codepoints. This makes the text "abc" display
22     /// as "cba" on screen. By leveraging software that supports these, people can write specially
23     /// crafted literals that make the surrounding code seem like it's performing one action, when
24     /// in reality it is performing another. Because of this, we proactively lint against their
25     /// presence to avoid surprises.
26     ///
27     /// ### Example
28     ///
29     /// ```rust,compile_fail
30     /// #![deny(text_direction_codepoint_in_literal)]
31     /// fn main() {
32     ///     println!("{:?}", '‮');
33     /// }
34     /// ```
35     ///
36     /// {{produces}}
37     ///
38     pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
39     Deny,
40     "detect special Unicode codepoints that affect the visual representation of text on screen, \
41      changing the direction in which text flows",
42 }
43
44 declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
45
46 impl HiddenUnicodeCodepoints {
47     fn lint_text_direction_codepoint(
48         &self,
49         cx: &EarlyContext<'_>,
50         text: Symbol,
51         span: Span,
52         padding: u32,
53         point_at_inner_spans: bool,
54         label: &str,
55     ) {
56         // Obtain the `Span`s for each of the forbidden chars.
57         let spans: Vec<_> = text
58             .as_str()
59             .char_indices()
60             .filter_map(|(i, c)| {
61                 TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
62                     let lo = span.lo() + BytePos(i as u32 + padding);
63                     (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
64                 })
65             })
66             .collect();
67
68         let count = spans.len();
69         let labels = point_at_inner_spans
70             .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() });
71         let sub = if point_at_inner_spans && !spans.is_empty() {
72             HiddenUnicodeCodepointsDiagSub::Escape { spans }
73         } else {
74             HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
75         };
76
77         cx.emit_spanned_lint(
78             TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
79             span,
80             HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub },
81         );
82     }
83 }
84 impl EarlyLintPass for HiddenUnicodeCodepoints {
85     fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
86         if let ast::AttrKind::DocComment(_, comment) = attr.kind {
87             if contains_text_flow_control_chars(comment.as_str()) {
88                 self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
89             }
90         }
91     }
92
93     #[inline]
94     fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
95         // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
96         match &expr.kind {
97             ast::ExprKind::Lit(token_lit) => {
98                 let text = token_lit.symbol;
99                 if !contains_text_flow_control_chars(text.as_str()) {
100                     return;
101                 }
102                 let padding = match token_lit.kind {
103                     // account for `"` or `'`
104                     ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
105                     // account for `r###"`
106                     ast::token::LitKind::StrRaw(n) => n as u32 + 2,
107                     _ => return,
108                 };
109                 self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
110             }
111             _ => {}
112         };
113     }
114 }