]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
6373f5b4fd6ff36f32f99b14bd80ec9326663dfc
[rust.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{pluralize, Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 pub(crate) fn emit_unescape_error(
11     handler: &Handler,
12     // interior part of the literal, without quotes
13     lit: &str,
14     // full span of the literal, including quotes
15     span_with_quotes: Span,
16     // interior span of the literal, without quotes
17     span: Span,
18     mode: Mode,
19     // range of the error inside `lit`
20     range: Range<usize>,
21     error: EscapeError,
22 ) {
23     debug!(
24         "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
25         lit, span_with_quotes, mode, range, error
26     );
27     let last_char = || {
28         let c = lit[range.clone()].chars().rev().next().unwrap();
29         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
30         (c, span)
31     };
32     match error {
33         EscapeError::LoneSurrogateUnicodeEscape => {
34             handler
35                 .struct_span_err(span, "invalid unicode character escape")
36                 .span_label(span, "invalid escape")
37                 .help("unicode escape must not be a surrogate")
38                 .emit();
39         }
40         EscapeError::OutOfRangeUnicodeEscape => {
41             handler
42                 .struct_span_err(span, "invalid unicode character escape")
43                 .span_label(span, "invalid escape")
44                 .help("unicode escape must be at most 10FFFF")
45                 .emit();
46         }
47         EscapeError::MoreThanOneChar => {
48             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
49
50             let mut has_help = false;
51             let mut handler = handler.struct_span_err(
52                 span_with_quotes,
53                 "character literal may only contain one codepoint",
54             );
55
56             if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
57                 let escaped_marks =
58                     lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
59                 handler.span_note(
60                     span,
61                     &format!(
62                         "this `{}` is followed by the combining mark{} `{}`",
63                         lit.chars().next().unwrap(),
64                         pluralize!(escaped_marks.len()),
65                         escaped_marks.join(""),
66                     ),
67                 );
68                 let normalized = lit.nfc().to_string();
69                 if normalized.chars().count() == 1 {
70                     has_help = true;
71                     handler.span_suggestion(
72                         span,
73                         &format!(
74                             "consider using the normalized form `{}` of this character",
75                             normalized.chars().next().unwrap().escape_default()
76                         ),
77                         normalized,
78                         Applicability::MachineApplicable,
79                     );
80                 }
81             } else {
82                 let printable: Vec<char> = lit
83                     .chars()
84                     .filter(|&x| {
85                         unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
86                             && !x.is_whitespace()
87                     })
88                     .collect();
89
90                 if let [ch] = printable.as_slice() {
91                     has_help = true;
92
93                     handler.span_note(
94                         span,
95                         &format!(
96                             "there are non-printing characters, the full sequence is `{}`",
97                             lit.escape_default(),
98                         ),
99                     );
100
101                     handler.span_suggestion(
102                         span,
103                         "consider removing the non-printing characters",
104                         ch,
105                         Applicability::MaybeIncorrect,
106                     );
107                 }
108             }
109
110             if !has_help {
111                 let (prefix, msg) = if mode.is_byte() {
112                     ("b", "if you meant to write a byte string literal, use double quotes")
113                 } else {
114                     ("", "if you meant to write a `str` literal, use double quotes")
115                 };
116                 let mut escaped = String::with_capacity(lit.len());
117                 let mut chrs = lit.chars().peekable();
118                 while let Some(first) = chrs.next() {
119                     match (first, chrs.peek()) {
120                         ('\\', Some('"')) => {
121                             escaped.push('\\');
122                             escaped.push('"');
123                             chrs.next();
124                         }
125                         ('"', _) => {
126                             escaped.push('\\');
127                             escaped.push('"')
128                         }
129                         (c, _) => escaped.push(c),
130                     };
131                 }
132                 handler.span_suggestion(
133                     span_with_quotes,
134                     msg,
135                     format!("{prefix}\"{escaped}\""),
136                     Applicability::MachineApplicable,
137                 );
138             }
139
140             handler.emit();
141         }
142         EscapeError::EscapeOnlyChar => {
143             let (c, char_span) = last_char();
144
145             let msg = if mode.is_byte() {
146                 "byte constant must be escaped"
147             } else {
148                 "character constant must be escaped"
149             };
150             handler
151                 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
152                 .span_suggestion(
153                     char_span,
154                     "escape the character",
155                     c.escape_default(),
156                     Applicability::MachineApplicable,
157                 )
158                 .emit();
159         }
160         EscapeError::BareCarriageReturn => {
161             let msg = if mode.in_double_quotes() {
162                 "bare CR not allowed in string, use `\\r` instead"
163             } else {
164                 "character constant must be escaped: `\\r`"
165             };
166             handler
167                 .struct_span_err(span, msg)
168                 .span_suggestion(
169                     span,
170                     "escape the character",
171                     "\\r",
172                     Applicability::MachineApplicable,
173                 )
174                 .emit();
175         }
176         EscapeError::BareCarriageReturnInRawString => {
177             assert!(mode.in_double_quotes());
178             let msg = "bare CR not allowed in raw string";
179             handler.span_err(span, msg);
180         }
181         EscapeError::InvalidEscape => {
182             let (c, span) = last_char();
183
184             let label =
185                 if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
186             let ec = escaped_char(c);
187             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
188             diag.span_label(span, label);
189             if c == '{' || c == '}' && !mode.is_byte() {
190                 diag.help(
191                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
192                 );
193             } else if c == '\r' {
194                 diag.help(
195                     "this is an isolated carriage return; consider checking your editor and \
196                      version control settings",
197                 );
198             } else {
199                 if !mode.is_byte() {
200                     diag.span_suggestion(
201                         span_with_quotes,
202                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
203                         format!("r\"{}\"", lit),
204                         Applicability::MaybeIncorrect,
205                     );
206                 }
207
208                 diag.help(
209                     "for more information, visit \
210                      <https://static.rust-lang.org/doc/master/reference.html#literals>",
211                 );
212             }
213             diag.emit();
214         }
215         EscapeError::TooShortHexEscape => {
216             handler.span_err(span, "numeric character escape is too short");
217         }
218         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
219             let (c, span) = last_char();
220
221             let msg = if error == EscapeError::InvalidCharInHexEscape {
222                 "invalid character in numeric character escape"
223             } else {
224                 "invalid character in unicode escape"
225             };
226             let c = escaped_char(c);
227
228             handler
229                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
230                 .span_label(span, msg)
231                 .emit();
232         }
233         EscapeError::NonAsciiCharInByte => {
234             let (c, span) = last_char();
235             let desc = match mode {
236                 Mode::Byte => "byte literal",
237                 Mode::ByteStr => "byte string literal",
238                 Mode::RawByteStr => "raw byte string literal",
239                 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
240             };
241             let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
242             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
243                 format!(" but is {:?}", c)
244             } else {
245                 String::new()
246             };
247             err.span_label(span, &format!("must be ASCII{}", postfix));
248             // Note: the \\xHH suggestions are not given for raw byte string
249             // literals, because they are araw and so cannot use any escapes.
250             if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
251                 err.span_suggestion(
252                     span,
253                     &format!(
254                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
255                         c
256                     ),
257                     format!("\\x{:X}", c as u32),
258                     Applicability::MaybeIncorrect,
259                 );
260             } else if mode == Mode::Byte {
261                 err.span_label(span, "this multibyte character does not fit into a single byte");
262             } else if mode != Mode::RawByteStr {
263                 let mut utf8 = String::new();
264                 utf8.push(c);
265                 err.span_suggestion(
266                     span,
267                     &format!(
268                         "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
269                         c
270                     ),
271                     utf8.as_bytes()
272                         .iter()
273                         .map(|b: &u8| format!("\\x{:X}", *b))
274                         .fold("".to_string(), |a, c| a + &c),
275                     Applicability::MaybeIncorrect,
276                 );
277             }
278             err.emit();
279         }
280         EscapeError::OutOfRangeHexEscape => {
281             handler
282                 .struct_span_err(span, "out of range hex escape")
283                 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
284                 .emit();
285         }
286         EscapeError::LeadingUnderscoreUnicodeEscape => {
287             let (c, span) = last_char();
288             let msg = "invalid start of unicode escape";
289             handler
290                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
291                 .span_label(span, msg)
292                 .emit();
293         }
294         EscapeError::OverlongUnicodeEscape => {
295             handler
296                 .struct_span_err(span, "overlong unicode escape")
297                 .span_label(span, "must have at most 6 hex digits")
298                 .emit();
299         }
300         EscapeError::UnclosedUnicodeEscape => {
301             handler
302                 .struct_span_err(span, "unterminated unicode escape")
303                 .span_label(span, "missing a closing `}`")
304                 .span_suggestion_verbose(
305                     span.shrink_to_hi(),
306                     "terminate the unicode escape",
307                     "}",
308                     Applicability::MaybeIncorrect,
309                 )
310                 .emit();
311         }
312         EscapeError::NoBraceInUnicodeEscape => {
313             let msg = "incorrect unicode escape sequence";
314             let mut diag = handler.struct_span_err(span, msg);
315
316             let mut suggestion = "\\u{".to_owned();
317             let mut suggestion_len = 0;
318             let (c, char_span) = last_char();
319             let chars = once(c).chain(lit[range.end..].chars());
320             for c in chars.take(6).take_while(|c| c.is_digit(16)) {
321                 suggestion.push(c);
322                 suggestion_len += c.len_utf8();
323             }
324
325             if suggestion_len > 0 {
326                 suggestion.push('}');
327                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
328                 diag.span_suggestion(
329                     span.with_hi(hi),
330                     "format of unicode escape sequences uses braces",
331                     suggestion,
332                     Applicability::MaybeIncorrect,
333                 );
334             } else {
335                 diag.span_label(span, msg);
336                 diag.help("format of unicode escape sequences is `\\u{...}`");
337             }
338
339             diag.emit();
340         }
341         EscapeError::UnicodeEscapeInByte => {
342             let msg = "unicode escape in byte string";
343             handler
344                 .struct_span_err(span, msg)
345                 .span_label(span, msg)
346                 .help("unicode escape sequences cannot be used as a byte or in a byte string")
347                 .emit();
348         }
349         EscapeError::EmptyUnicodeEscape => {
350             handler
351                 .struct_span_err(span, "empty unicode escape")
352                 .span_label(span, "this escape must have at least 1 hex digit")
353                 .emit();
354         }
355         EscapeError::ZeroChars => {
356             let msg = "empty character literal";
357             handler.struct_span_err(span, msg).span_label(span, msg).emit();
358         }
359         EscapeError::LoneSlash => {
360             let msg = "invalid trailing slash in literal";
361             handler.struct_span_err(span, msg).span_label(span, msg).emit();
362         }
363         EscapeError::UnskippedWhitespaceWarning => {
364             let (c, char_span) = last_char();
365             let msg =
366                 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
367             handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
368         }
369         EscapeError::MultipleSkippedLinesWarning => {
370             let msg = "multiple lines skipped by escaped newline";
371             let bottom_msg = "skipping everything up to and including this point";
372             handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
373         }
374     }
375 }
376
377 /// Pushes a character to a message string for error reporting
378 pub(crate) fn escaped_char(c: char) -> String {
379     match c {
380         '\u{20}'..='\u{7e}' => {
381             // Don't escape \, ' or " for user-facing messages
382             c.to_string()
383         }
384         _ => c.escape_default().to_string(),
385     }
386 }