]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Migrate `rustc_parse` to derive diagnostics
[rust.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
11
12 pub(crate) fn emit_unescape_error(
13     handler: &Handler,
14     // interior part of the literal, without quotes
15     lit: &str,
16     // full span of the literal, including quotes
17     span_with_quotes: Span,
18     // interior span of the literal, without quotes
19     span: Span,
20     mode: Mode,
21     // range of the error inside `lit`
22     range: Range<usize>,
23     error: EscapeError,
24 ) {
25     debug!(
26         "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
27         lit, span_with_quotes, mode, range, error
28     );
29     let last_char = || {
30         let c = lit[range.clone()].chars().rev().next().unwrap();
31         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
32         (c, span)
33     };
34     match error {
35         EscapeError::LoneSurrogateUnicodeEscape => {
36             handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true });
37         }
38         EscapeError::OutOfRangeUnicodeEscape => {
39             handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false });
40         }
41         EscapeError::MoreThanOneChar => {
42             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
43             let mut sugg = None;
44             let mut note = None;
45
46             let lit_chars = lit.chars().collect::<Vec<_>>();
47             let (first, rest) = lit_chars.split_first().unwrap();
48             if rest.iter().copied().all(is_combining_mark) {
49                 let normalized = lit.nfc().to_string();
50                 if normalized.chars().count() == 1 {
51                     let ch = normalized.chars().next().unwrap().escape_default().to_string();
52                     sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized });
53                 }
54                 let escaped_marks =
55                     rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
56                 note = Some(MoreThanOneCharNote::AllCombining {
57                     span,
58                     chr: format!("{first}"),
59                     len: escaped_marks.len(),
60                     escaped_marks: escaped_marks.join(""),
61                 });
62             } else {
63                 let printable: Vec<char> = lit
64                     .chars()
65                     .filter(|&x| {
66                         unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
67                             && !x.is_whitespace()
68                     })
69                     .collect();
70
71                 if let &[ch] = printable.as_slice() {
72                     sugg =
73                         Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() });
74                     note = Some(MoreThanOneCharNote::NonPrinting {
75                         span,
76                         escaped: lit.escape_default().to_string(),
77                     });
78                 }
79             };
80             let sugg = sugg.unwrap_or_else(|| {
81                 let is_byte = mode.is_byte();
82                 let prefix = if is_byte { "b" } else { "" };
83                 let mut escaped = String::with_capacity(lit.len());
84                 let mut chrs = lit.chars().peekable();
85                 while let Some(first) = chrs.next() {
86                     match (first, chrs.peek()) {
87                         ('\\', Some('"')) => {
88                             escaped.push('\\');
89                             escaped.push('"');
90                             chrs.next();
91                         }
92                         ('"', _) => {
93                             escaped.push('\\');
94                             escaped.push('"')
95                         }
96                         (c, _) => escaped.push(c),
97                     };
98                 }
99                 let sugg = format!("{prefix}\"{escaped}\"");
100                 MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
101             });
102             handler.emit_err(UnescapeError::MoreThanOneChar {
103                 span: span_with_quotes,
104                 note,
105                 suggestion: sugg,
106             });
107         }
108         EscapeError::EscapeOnlyChar => {
109             let (c, char_span) = last_char();
110             handler.emit_err(UnescapeError::EscapeOnlyChar {
111                 span,
112                 char_span,
113                 escaped_sugg: c.escape_default().to_string(),
114                 escaped_msg: escaped_char(c),
115                 byte: mode.is_byte(),
116             });
117         }
118         EscapeError::BareCarriageReturn => {
119             let double_quotes = mode.in_double_quotes();
120             handler.emit_err(UnescapeError::BareCr { span, double_quotes });
121         }
122         EscapeError::BareCarriageReturnInRawString => {
123             assert!(mode.in_double_quotes());
124             handler.emit_err(UnescapeError::BareCrRawString(span));
125         }
126         EscapeError::InvalidEscape => {
127             let (c, span) = last_char();
128
129             let label =
130                 if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
131             let ec = escaped_char(c);
132             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
133             diag.span_label(span, label);
134             if c == '{' || c == '}' && !mode.is_byte() {
135                 diag.help(
136                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
137                 );
138             } else if c == '\r' {
139                 diag.help(
140                     "this is an isolated carriage return; consider checking your editor and \
141                      version control settings",
142                 );
143             } else {
144                 if !mode.is_byte() {
145                     diag.span_suggestion(
146                         span_with_quotes,
147                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
148                         format!("r\"{}\"", lit),
149                         Applicability::MaybeIncorrect,
150                     );
151                 }
152
153                 diag.help(
154                     "for more information, visit \
155                      <https://static.rust-lang.org/doc/master/reference.html#literals>",
156                 );
157             }
158             diag.emit();
159         }
160         EscapeError::TooShortHexEscape => {
161             handler.emit_err(UnescapeError::TooShortHexEscape(span));
162         }
163         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
164             let (c, span) = last_char();
165             let is_hex = error == EscapeError::InvalidCharInHexEscape;
166             let ch = escaped_char(c);
167             handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch });
168         }
169         EscapeError::NonAsciiCharInByte => {
170             let (c, span) = last_char();
171             let desc = match mode {
172                 Mode::Byte => "byte literal",
173                 Mode::ByteStr => "byte string literal",
174                 Mode::RawByteStr => "raw byte string literal",
175                 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
176             };
177             let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
178             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
179                 format!(" but is {:?}", c)
180             } else {
181                 String::new()
182             };
183             err.span_label(span, &format!("must be ASCII{}", postfix));
184             // Note: the \\xHH suggestions are not given for raw byte string
185             // literals, because they are araw and so cannot use any escapes.
186             if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
187                 err.span_suggestion(
188                     span,
189                     &format!(
190                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
191                         c
192                     ),
193                     format!("\\x{:X}", c as u32),
194                     Applicability::MaybeIncorrect,
195                 );
196             } else if mode == Mode::Byte {
197                 err.span_label(span, "this multibyte character does not fit into a single byte");
198             } else if mode != Mode::RawByteStr {
199                 let mut utf8 = String::new();
200                 utf8.push(c);
201                 err.span_suggestion(
202                     span,
203                     &format!(
204                         "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
205                         c
206                     ),
207                     utf8.as_bytes()
208                         .iter()
209                         .map(|b: &u8| format!("\\x{:X}", *b))
210                         .fold("".to_string(), |a, c| a + &c),
211                     Applicability::MaybeIncorrect,
212                 );
213             }
214             err.emit();
215         }
216         EscapeError::OutOfRangeHexEscape => {
217             handler.emit_err(UnescapeError::OutOfRangeHexEscape(span));
218         }
219         EscapeError::LeadingUnderscoreUnicodeEscape => {
220             let (c, span) = last_char();
221             handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
222                 span,
223                 ch: escaped_char(c),
224             });
225         }
226         EscapeError::OverlongUnicodeEscape => {
227             handler.emit_err(UnescapeError::OverlongUnicodeEscape(span));
228         }
229         EscapeError::UnclosedUnicodeEscape => {
230             handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi()));
231         }
232         EscapeError::NoBraceInUnicodeEscape => {
233             let mut suggestion = "\\u{".to_owned();
234             let mut suggestion_len = 0;
235             let (c, char_span) = last_char();
236             let chars = once(c).chain(lit[range.end..].chars());
237             for c in chars.take(6).take_while(|c| c.is_digit(16)) {
238                 suggestion.push(c);
239                 suggestion_len += c.len_utf8();
240             }
241
242             let (label, sub) = if suggestion_len > 0 {
243                 suggestion.push('}');
244                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
245                 (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion })
246             } else {
247                 (Some(span), NoBraceUnicodeSub::Help)
248             };
249             handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub });
250         }
251         EscapeError::UnicodeEscapeInByte => {
252             handler.emit_err(UnescapeError::UnicodeEscapeInByte(span));
253         }
254         EscapeError::EmptyUnicodeEscape => {
255             handler.emit_err(UnescapeError::EmptyUnicodeEscape(span));
256         }
257         EscapeError::ZeroChars => {
258             handler.emit_err(UnescapeError::ZeroChars(span));
259         }
260         EscapeError::LoneSlash => {
261             handler.emit_err(UnescapeError::LoneSlash(span));
262         }
263         EscapeError::UnskippedWhitespaceWarning => {
264             let (c, char_span) = last_char();
265             handler.emit_warning(UnescapeError::UnskippedWhitespace {
266                 span,
267                 ch: escaped_char(c),
268                 char_span,
269             });
270         }
271         EscapeError::MultipleSkippedLinesWarning => {
272             handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span));
273         }
274     }
275 }
276
277 /// Pushes a character to a message string for error reporting
278 pub(crate) fn escaped_char(c: char) -> String {
279     match c {
280         '\u{20}'..='\u{7e}' => {
281             // Don't escape \, ' or " for user-facing messages
282             c.to_string()
283         }
284         _ => c.escape_default().to_string(),
285     }
286 }