]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Auto merge of #97802 - Enselic:add-no_ignore_sigkill-feature, r=joshtriplett
[rust.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{pluralize, Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 pub(crate) fn emit_unescape_error(
11     handler: &Handler,
12     // interior part of the literal, without quotes
13     lit: &str,
14     // full span of the literal, including quotes
15     span_with_quotes: Span,
16     // interior span of the literal, without quotes
17     span: Span,
18     mode: Mode,
19     // range of the error inside `lit`
20     range: Range<usize>,
21     error: EscapeError,
22 ) {
23     debug!(
24         "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
25         lit, span_with_quotes, mode, range, error
26     );
27     let last_char = || {
28         let c = lit[range.clone()].chars().rev().next().unwrap();
29         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
30         (c, span)
31     };
32     match error {
33         EscapeError::LoneSurrogateUnicodeEscape => {
34             handler
35                 .struct_span_err(span, "invalid unicode character escape")
36                 .span_label(span, "invalid escape")
37                 .help("unicode escape must not be a surrogate")
38                 .emit();
39         }
40         EscapeError::OutOfRangeUnicodeEscape => {
41             handler
42                 .struct_span_err(span, "invalid unicode character escape")
43                 .span_label(span, "invalid escape")
44                 .help("unicode escape must be at most 10FFFF")
45                 .emit();
46         }
47         EscapeError::MoreThanOneChar => {
48             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
49
50             let mut has_help = false;
51             let mut handler = handler.struct_span_err(
52                 span_with_quotes,
53                 "character literal may only contain one codepoint",
54             );
55
56             if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
57                 let escaped_marks =
58                     lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
59                 handler.span_note(
60                     span,
61                     &format!(
62                         "this `{}` is followed by the combining mark{} `{}`",
63                         lit.chars().next().unwrap(),
64                         pluralize!(escaped_marks.len()),
65                         escaped_marks.join(""),
66                     ),
67                 );
68                 let normalized = lit.nfc().to_string();
69                 if normalized.chars().count() == 1 {
70                     has_help = true;
71                     handler.span_suggestion(
72                         span,
73                         &format!(
74                             "consider using the normalized form `{}` of this character",
75                             normalized.chars().next().unwrap().escape_default()
76                         ),
77                         normalized,
78                         Applicability::MachineApplicable,
79                     );
80                 }
81             } else {
82                 let printable: Vec<char> = lit
83                     .chars()
84                     .filter(|&x| {
85                         unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
86                             && !x.is_whitespace()
87                     })
88                     .collect();
89
90                 if let [ch] = printable.as_slice() {
91                     has_help = true;
92
93                     handler.span_note(
94                         span,
95                         &format!(
96                             "there are non-printing characters, the full sequence is `{}`",
97                             lit.escape_default(),
98                         ),
99                     );
100
101                     handler.span_suggestion(
102                         span,
103                         "consider removing the non-printing characters",
104                         ch,
105                         Applicability::MaybeIncorrect,
106                     );
107                 }
108             }
109
110             if !has_help {
111                 let (prefix, msg) = if mode.is_bytes() {
112                     ("b", "if you meant to write a byte string literal, use double quotes")
113                 } else {
114                     ("", "if you meant to write a `str` literal, use double quotes")
115                 };
116
117                 handler.span_suggestion(
118                     span_with_quotes,
119                     msg,
120                     format!("{}\"{}\"", prefix, lit),
121                     Applicability::MachineApplicable,
122                 );
123             }
124
125             handler.emit();
126         }
127         EscapeError::EscapeOnlyChar => {
128             let (c, char_span) = last_char();
129
130             let msg = if mode.is_bytes() {
131                 "byte constant must be escaped"
132             } else {
133                 "character constant must be escaped"
134             };
135             handler
136                 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
137                 .span_suggestion(
138                     char_span,
139                     "escape the character",
140                     c.escape_default(),
141                     Applicability::MachineApplicable,
142                 )
143                 .emit();
144         }
145         EscapeError::BareCarriageReturn => {
146             let msg = if mode.in_double_quotes() {
147                 "bare CR not allowed in string, use `\\r` instead"
148             } else {
149                 "character constant must be escaped: `\\r`"
150             };
151             handler
152                 .struct_span_err(span, msg)
153                 .span_suggestion(
154                     span,
155                     "escape the character",
156                     "\\r",
157                     Applicability::MachineApplicable,
158                 )
159                 .emit();
160         }
161         EscapeError::BareCarriageReturnInRawString => {
162             assert!(mode.in_double_quotes());
163             let msg = "bare CR not allowed in raw string";
164             handler.span_err(span, msg);
165         }
166         EscapeError::InvalidEscape => {
167             let (c, span) = last_char();
168
169             let label =
170                 if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
171             let ec = escaped_char(c);
172             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
173             diag.span_label(span, label);
174             if c == '{' || c == '}' && !mode.is_bytes() {
175                 diag.help(
176                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
177                 );
178             } else if c == '\r' {
179                 diag.help(
180                     "this is an isolated carriage return; consider checking your editor and \
181                      version control settings",
182                 );
183             } else {
184                 if !mode.is_bytes() {
185                     diag.span_suggestion(
186                         span_with_quotes,
187                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
188                         format!("r\"{}\"", lit),
189                         Applicability::MaybeIncorrect,
190                     );
191                 }
192
193                 diag.help(
194                     "for more information, visit \
195                      <https://static.rust-lang.org/doc/master/reference.html#literals>",
196                 );
197             }
198             diag.emit();
199         }
200         EscapeError::TooShortHexEscape => {
201             handler.span_err(span, "numeric character escape is too short");
202         }
203         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
204             let (c, span) = last_char();
205
206             let msg = if error == EscapeError::InvalidCharInHexEscape {
207                 "invalid character in numeric character escape"
208             } else {
209                 "invalid character in unicode escape"
210             };
211             let c = escaped_char(c);
212
213             handler
214                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
215                 .span_label(span, msg)
216                 .emit();
217         }
218         EscapeError::NonAsciiCharInByte => {
219             assert!(mode.is_bytes());
220             let (c, span) = last_char();
221             let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
222             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
223                 format!(" but is {:?}", c)
224             } else {
225                 String::new()
226             };
227             err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
228             if (c as u32) <= 0xFF {
229                 err.span_suggestion(
230                     span,
231                     &format!(
232                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
233                         c
234                     ),
235                     format!("\\x{:X}", c as u32),
236                     Applicability::MaybeIncorrect,
237                 );
238             } else if matches!(mode, Mode::Byte) {
239                 err.span_label(span, "this multibyte character does not fit into a single byte");
240             } else if matches!(mode, Mode::ByteStr) {
241                 let mut utf8 = String::new();
242                 utf8.push(c);
243                 err.span_suggestion(
244                     span,
245                     &format!(
246                         "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
247                         c
248                     ),
249                     utf8.as_bytes()
250                         .iter()
251                         .map(|b: &u8| format!("\\x{:X}", *b))
252                         .fold("".to_string(), |a, c| a + &c),
253                     Applicability::MaybeIncorrect,
254                 );
255             }
256             err.emit();
257         }
258         EscapeError::NonAsciiCharInByteString => {
259             assert!(mode.is_bytes());
260             let (c, span) = last_char();
261             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
262                 format!(" but is {:?}", c)
263             } else {
264                 String::new()
265             };
266             handler
267                 .struct_span_err(span, "raw byte string must be ASCII")
268                 .span_label(span, &format!("must be ASCII{}", postfix))
269                 .emit();
270         }
271         EscapeError::OutOfRangeHexEscape => {
272             handler
273                 .struct_span_err(span, "out of range hex escape")
274                 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
275                 .emit();
276         }
277         EscapeError::LeadingUnderscoreUnicodeEscape => {
278             let (c, span) = last_char();
279             let msg = "invalid start of unicode escape";
280             handler
281                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
282                 .span_label(span, msg)
283                 .emit();
284         }
285         EscapeError::OverlongUnicodeEscape => {
286             handler
287                 .struct_span_err(span, "overlong unicode escape")
288                 .span_label(span, "must have at most 6 hex digits")
289                 .emit();
290         }
291         EscapeError::UnclosedUnicodeEscape => {
292             handler
293                 .struct_span_err(span, "unterminated unicode escape")
294                 .span_label(span, "missing a closing `}`")
295                 .span_suggestion_verbose(
296                     span.shrink_to_hi(),
297                     "terminate the unicode escape",
298                     "}",
299                     Applicability::MaybeIncorrect,
300                 )
301                 .emit();
302         }
303         EscapeError::NoBraceInUnicodeEscape => {
304             let msg = "incorrect unicode escape sequence";
305             let mut diag = handler.struct_span_err(span, msg);
306
307             let mut suggestion = "\\u{".to_owned();
308             let mut suggestion_len = 0;
309             let (c, char_span) = last_char();
310             let chars = once(c).chain(lit[range.end..].chars());
311             for c in chars.take(6).take_while(|c| c.is_digit(16)) {
312                 suggestion.push(c);
313                 suggestion_len += c.len_utf8();
314             }
315
316             if suggestion_len > 0 {
317                 suggestion.push('}');
318                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
319                 diag.span_suggestion(
320                     span.with_hi(hi),
321                     "format of unicode escape sequences uses braces",
322                     suggestion,
323                     Applicability::MaybeIncorrect,
324                 );
325             } else {
326                 diag.span_label(span, msg);
327                 diag.help("format of unicode escape sequences is `\\u{...}`");
328             }
329
330             diag.emit();
331         }
332         EscapeError::UnicodeEscapeInByte => {
333             let msg = "unicode escape in byte string";
334             handler
335                 .struct_span_err(span, msg)
336                 .span_label(span, msg)
337                 .help("unicode escape sequences cannot be used as a byte or in a byte string")
338                 .emit();
339         }
340         EscapeError::EmptyUnicodeEscape => {
341             handler
342                 .struct_span_err(span, "empty unicode escape")
343                 .span_label(span, "this escape must have at least 1 hex digit")
344                 .emit();
345         }
346         EscapeError::ZeroChars => {
347             let msg = "empty character literal";
348             handler.struct_span_err(span, msg).span_label(span, msg).emit();
349         }
350         EscapeError::LoneSlash => {
351             let msg = "invalid trailing slash in literal";
352             handler.struct_span_err(span, msg).span_label(span, msg).emit();
353         }
354         EscapeError::UnskippedWhitespaceWarning => {
355             let (c, char_span) = last_char();
356             let msg =
357                 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
358             handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
359         }
360         EscapeError::MultipleSkippedLinesWarning => {
361             let msg = "multiple lines skipped by escaped newline";
362             let bottom_msg = "skipping everything up to and including this point";
363             handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
364         }
365     }
366 }
367
368 /// Pushes a character to a message string for error reporting
369 pub(crate) fn escaped_char(c: char) -> String {
370     match c {
371         '\u{20}'..='\u{7e}' => {
372             // Don't escape \, ' or " for user-facing messages
373             c.to_string()
374         }
375         _ => c.escape_default().to_string(),
376     }
377 }