]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Rollup merge of #97963 - devnexen:net_listener_neg, r=the8472
[rust.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{pluralize, Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 pub(crate) fn emit_unescape_error(
11     handler: &Handler,
12     // interior part of the literal, without quotes
13     lit: &str,
14     // full span of the literal, including quotes
15     span_with_quotes: Span,
16     // interior span of the literal, without quotes
17     span: Span,
18     mode: Mode,
19     // range of the error inside `lit`
20     range: Range<usize>,
21     error: EscapeError,
22 ) {
23     tracing::debug!(
24         "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
25         lit,
26         span_with_quotes,
27         mode,
28         range,
29         error
30     );
31     let last_char = || {
32         let c = lit[range.clone()].chars().rev().next().unwrap();
33         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
34         (c, span)
35     };
36     match error {
37         EscapeError::LoneSurrogateUnicodeEscape => {
38             handler
39                 .struct_span_err(span, "invalid unicode character escape")
40                 .span_label(span, "invalid escape")
41                 .help("unicode escape must not be a surrogate")
42                 .emit();
43         }
44         EscapeError::OutOfRangeUnicodeEscape => {
45             handler
46                 .struct_span_err(span, "invalid unicode character escape")
47                 .span_label(span, "invalid escape")
48                 .help("unicode escape must be at most 10FFFF")
49                 .emit();
50         }
51         EscapeError::MoreThanOneChar => {
52             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
53
54             let mut has_help = false;
55             let mut handler = handler.struct_span_err(
56                 span_with_quotes,
57                 "character literal may only contain one codepoint",
58             );
59
60             if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
61                 let escaped_marks =
62                     lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
63                 handler.span_note(
64                     span,
65                     &format!(
66                         "this `{}` is followed by the combining mark{} `{}`",
67                         lit.chars().next().unwrap(),
68                         pluralize!(escaped_marks.len()),
69                         escaped_marks.join(""),
70                     ),
71                 );
72                 let normalized = lit.nfc().to_string();
73                 if normalized.chars().count() == 1 {
74                     has_help = true;
75                     handler.span_suggestion(
76                         span,
77                         &format!(
78                             "consider using the normalized form `{}` of this character",
79                             normalized.chars().next().unwrap().escape_default()
80                         ),
81                         normalized,
82                         Applicability::MachineApplicable,
83                     );
84                 }
85             } else {
86                 let printable: Vec<char> = lit
87                     .chars()
88                     .filter(|&x| {
89                         unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
90                             && !x.is_whitespace()
91                     })
92                     .collect();
93
94                 if let [ch] = printable.as_slice() {
95                     has_help = true;
96
97                     handler.span_note(
98                         span,
99                         &format!(
100                             "there are non-printing characters, the full sequence is `{}`",
101                             lit.escape_default(),
102                         ),
103                     );
104
105                     handler.span_suggestion(
106                         span,
107                         "consider removing the non-printing characters",
108                         ch,
109                         Applicability::MaybeIncorrect,
110                     );
111                 }
112             }
113
114             if !has_help {
115                 let (prefix, msg) = if mode.is_bytes() {
116                     ("b", "if you meant to write a byte string literal, use double quotes")
117                 } else {
118                     ("", "if you meant to write a `str` literal, use double quotes")
119                 };
120
121                 handler.span_suggestion(
122                     span_with_quotes,
123                     msg,
124                     format!("{}\"{}\"", prefix, lit),
125                     Applicability::MachineApplicable,
126                 );
127             }
128
129             handler.emit();
130         }
131         EscapeError::EscapeOnlyChar => {
132             let (c, char_span) = last_char();
133
134             let msg = if mode.is_bytes() {
135                 "byte constant must be escaped"
136             } else {
137                 "character constant must be escaped"
138             };
139             handler
140                 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
141                 .span_suggestion(
142                     char_span,
143                     "escape the character",
144                     c.escape_default(),
145                     Applicability::MachineApplicable,
146                 )
147                 .emit();
148         }
149         EscapeError::BareCarriageReturn => {
150             let msg = if mode.in_double_quotes() {
151                 "bare CR not allowed in string, use `\\r` instead"
152             } else {
153                 "character constant must be escaped: `\\r`"
154             };
155             handler
156                 .struct_span_err(span, msg)
157                 .span_suggestion(
158                     span,
159                     "escape the character",
160                     "\\r",
161                     Applicability::MachineApplicable,
162                 )
163                 .emit();
164         }
165         EscapeError::BareCarriageReturnInRawString => {
166             assert!(mode.in_double_quotes());
167             let msg = "bare CR not allowed in raw string";
168             handler.span_err(span, msg);
169         }
170         EscapeError::InvalidEscape => {
171             let (c, span) = last_char();
172
173             let label =
174                 if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
175             let ec = escaped_char(c);
176             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
177             diag.span_label(span, label);
178             if c == '{' || c == '}' && !mode.is_bytes() {
179                 diag.help(
180                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
181                 );
182             } else if c == '\r' {
183                 diag.help(
184                     "this is an isolated carriage return; consider checking your editor and \
185                      version control settings",
186                 );
187             } else {
188                 if !mode.is_bytes() {
189                     diag.span_suggestion(
190                         span_with_quotes,
191                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
192                         format!("r\"{}\"", lit),
193                         Applicability::MaybeIncorrect,
194                     );
195                 }
196
197                 diag.help(
198                     "for more information, visit \
199                      <https://static.rust-lang.org/doc/master/reference.html#literals>",
200                 );
201             }
202             diag.emit();
203         }
204         EscapeError::TooShortHexEscape => {
205             handler.span_err(span, "numeric character escape is too short");
206         }
207         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
208             let (c, span) = last_char();
209
210             let msg = if error == EscapeError::InvalidCharInHexEscape {
211                 "invalid character in numeric character escape"
212             } else {
213                 "invalid character in unicode escape"
214             };
215             let c = escaped_char(c);
216
217             handler
218                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
219                 .span_label(span, msg)
220                 .emit();
221         }
222         EscapeError::NonAsciiCharInByte => {
223             assert!(mode.is_bytes());
224             let (c, span) = last_char();
225             let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
226             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
227                 format!(" but is {:?}", c)
228             } else {
229                 String::new()
230             };
231             err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
232             if (c as u32) <= 0xFF {
233                 err.span_suggestion(
234                     span,
235                     &format!(
236                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
237                         c
238                     ),
239                     format!("\\x{:X}", c as u32),
240                     Applicability::MaybeIncorrect,
241                 );
242             } else if matches!(mode, Mode::Byte) {
243                 err.span_label(span, "this multibyte character does not fit into a single byte");
244             } else if matches!(mode, Mode::ByteStr) {
245                 let mut utf8 = String::new();
246                 utf8.push(c);
247                 err.span_suggestion(
248                     span,
249                     &format!(
250                         "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
251                         c
252                     ),
253                     utf8.as_bytes()
254                         .iter()
255                         .map(|b: &u8| format!("\\x{:X}", *b))
256                         .fold("".to_string(), |a, c| a + &c),
257                     Applicability::MaybeIncorrect,
258                 );
259             }
260             err.emit();
261         }
262         EscapeError::NonAsciiCharInByteString => {
263             assert!(mode.is_bytes());
264             let (c, span) = last_char();
265             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
266                 format!(" but is {:?}", c)
267             } else {
268                 String::new()
269             };
270             handler
271                 .struct_span_err(span, "raw byte string must be ASCII")
272                 .span_label(span, &format!("must be ASCII{}", postfix))
273                 .emit();
274         }
275         EscapeError::OutOfRangeHexEscape => {
276             handler
277                 .struct_span_err(span, "out of range hex escape")
278                 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
279                 .emit();
280         }
281         EscapeError::LeadingUnderscoreUnicodeEscape => {
282             let (c, span) = last_char();
283             let msg = "invalid start of unicode escape";
284             handler
285                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
286                 .span_label(span, msg)
287                 .emit();
288         }
289         EscapeError::OverlongUnicodeEscape => {
290             handler
291                 .struct_span_err(span, "overlong unicode escape")
292                 .span_label(span, "must have at most 6 hex digits")
293                 .emit();
294         }
295         EscapeError::UnclosedUnicodeEscape => {
296             handler
297                 .struct_span_err(span, "unterminated unicode escape")
298                 .span_label(span, "missing a closing `}`")
299                 .span_suggestion_verbose(
300                     span.shrink_to_hi(),
301                     "terminate the unicode escape",
302                     "}",
303                     Applicability::MaybeIncorrect,
304                 )
305                 .emit();
306         }
307         EscapeError::NoBraceInUnicodeEscape => {
308             let msg = "incorrect unicode escape sequence";
309             let mut diag = handler.struct_span_err(span, msg);
310
311             let mut suggestion = "\\u{".to_owned();
312             let mut suggestion_len = 0;
313             let (c, char_span) = last_char();
314             let chars = once(c).chain(lit[range.end..].chars());
315             for c in chars.take(6).take_while(|c| c.is_digit(16)) {
316                 suggestion.push(c);
317                 suggestion_len += c.len_utf8();
318             }
319
320             if suggestion_len > 0 {
321                 suggestion.push('}');
322                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
323                 diag.span_suggestion(
324                     span.with_hi(hi),
325                     "format of unicode escape sequences uses braces",
326                     suggestion,
327                     Applicability::MaybeIncorrect,
328                 );
329             } else {
330                 diag.span_label(span, msg);
331                 diag.help("format of unicode escape sequences is `\\u{...}`");
332             }
333
334             diag.emit();
335         }
336         EscapeError::UnicodeEscapeInByte => {
337             let msg = "unicode escape in byte string";
338             handler
339                 .struct_span_err(span, msg)
340                 .span_label(span, msg)
341                 .help("unicode escape sequences cannot be used as a byte or in a byte string")
342                 .emit();
343         }
344         EscapeError::EmptyUnicodeEscape => {
345             handler
346                 .struct_span_err(span, "empty unicode escape")
347                 .span_label(span, "this escape must have at least 1 hex digit")
348                 .emit();
349         }
350         EscapeError::ZeroChars => {
351             let msg = "empty character literal";
352             handler.struct_span_err(span, msg).span_label(span, msg).emit();
353         }
354         EscapeError::LoneSlash => {
355             let msg = "invalid trailing slash in literal";
356             handler.struct_span_err(span, msg).span_label(span, msg).emit();
357         }
358         EscapeError::UnskippedWhitespaceWarning => {
359             let (c, char_span) = last_char();
360             let msg =
361                 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
362             handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
363         }
364         EscapeError::MultipleSkippedLinesWarning => {
365             let msg = "multiple lines skipped by escaped newline";
366             let bottom_msg = "skipping everything up to and including this point";
367             handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
368         }
369     }
370 }
371
372 /// Pushes a character to a message string for error reporting
373 pub(crate) fn escaped_char(c: char) -> String {
374     match c {
375         '\u{20}'..='\u{7e}' => {
376             // Don't escape \, ' or " for user-facing messages
377             c.to_string()
378         }
379         _ => c.escape_default().to_string(),
380     }
381 }