]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Properly escape quotes when suggesting switching between char/string literals
[rust.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{pluralize, Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 pub(crate) fn emit_unescape_error(
11     handler: &Handler,
12     // interior part of the literal, without quotes
13     lit: &str,
14     // full span of the literal, including quotes
15     span_with_quotes: Span,
16     // interior span of the literal, without quotes
17     span: Span,
18     mode: Mode,
19     // range of the error inside `lit`
20     range: Range<usize>,
21     error: EscapeError,
22 ) {
23     debug!(
24         "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
25         lit, span_with_quotes, mode, range, error
26     );
27     let last_char = || {
28         let c = lit[range.clone()].chars().rev().next().unwrap();
29         let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
30         (c, span)
31     };
32     match error {
33         EscapeError::LoneSurrogateUnicodeEscape => {
34             handler
35                 .struct_span_err(span, "invalid unicode character escape")
36                 .span_label(span, "invalid escape")
37                 .help("unicode escape must not be a surrogate")
38                 .emit();
39         }
40         EscapeError::OutOfRangeUnicodeEscape => {
41             handler
42                 .struct_span_err(span, "invalid unicode character escape")
43                 .span_label(span, "invalid escape")
44                 .help("unicode escape must be at most 10FFFF")
45                 .emit();
46         }
47         EscapeError::MoreThanOneChar => {
48             use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
49
50             let mut has_help = false;
51             let mut handler = handler.struct_span_err(
52                 span_with_quotes,
53                 "character literal may only contain one codepoint",
54             );
55
56             if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
57                 let escaped_marks =
58                     lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
59                 handler.span_note(
60                     span,
61                     &format!(
62                         "this `{}` is followed by the combining mark{} `{}`",
63                         lit.chars().next().unwrap(),
64                         pluralize!(escaped_marks.len()),
65                         escaped_marks.join(""),
66                     ),
67                 );
68                 let normalized = lit.nfc().to_string();
69                 if normalized.chars().count() == 1 {
70                     has_help = true;
71                     handler.span_suggestion(
72                         span,
73                         &format!(
74                             "consider using the normalized form `{}` of this character",
75                             normalized.chars().next().unwrap().escape_default()
76                         ),
77                         normalized,
78                         Applicability::MachineApplicable,
79                     );
80                 }
81             } else {
82                 let printable: Vec<char> = lit
83                     .chars()
84                     .filter(|&x| {
85                         unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
86                             && !x.is_whitespace()
87                     })
88                     .collect();
89
90                 if let [ch] = printable.as_slice() {
91                     has_help = true;
92
93                     handler.span_note(
94                         span,
95                         &format!(
96                             "there are non-printing characters, the full sequence is `{}`",
97                             lit.escape_default(),
98                         ),
99                     );
100
101                     handler.span_suggestion(
102                         span,
103                         "consider removing the non-printing characters",
104                         ch,
105                         Applicability::MaybeIncorrect,
106                     );
107                 }
108             }
109
110             if !has_help {
111                 let (prefix, msg) = if mode.is_bytes() {
112                     ("b", "if you meant to write a byte string literal, use double quotes")
113                 } else {
114                     ("", "if you meant to write a `str` literal, use double quotes")
115                 };
116                 let mut escaped = String::with_capacity(lit.len());
117                 let mut chrs = lit.chars().peekable();
118                 while let Some(first) = chrs.next() {
119                     match (first, chrs.peek()) {
120                         ('\\', Some('"')) => {
121                             escaped.push('\\');
122                             escaped.push('"');
123                             chrs.next();
124                         }
125                         ('"', _) => {
126                             escaped.push('\\');
127                             escaped.push('"')
128                         }
129                         (c, _) => escaped.push(c),
130                     };
131                 }
132                 handler.span_suggestion(
133                     span_with_quotes,
134                     msg,
135                     format!("{prefix}\"{escaped}\""),
136                     Applicability::MachineApplicable,
137                 );
138             }
139
140             handler.emit();
141         }
142         EscapeError::EscapeOnlyChar => {
143             let (c, char_span) = last_char();
144
145             let msg = if mode.is_bytes() {
146                 "byte constant must be escaped"
147             } else {
148                 "character constant must be escaped"
149             };
150             handler
151                 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
152                 .span_suggestion(
153                     char_span,
154                     "escape the character",
155                     c.escape_default(),
156                     Applicability::MachineApplicable,
157                 )
158                 .emit();
159         }
160         EscapeError::BareCarriageReturn => {
161             let msg = if mode.in_double_quotes() {
162                 "bare CR not allowed in string, use `\\r` instead"
163             } else {
164                 "character constant must be escaped: `\\r`"
165             };
166             handler
167                 .struct_span_err(span, msg)
168                 .span_suggestion(
169                     span,
170                     "escape the character",
171                     "\\r",
172                     Applicability::MachineApplicable,
173                 )
174                 .emit();
175         }
176         EscapeError::BareCarriageReturnInRawString => {
177             assert!(mode.in_double_quotes());
178             let msg = "bare CR not allowed in raw string";
179             handler.span_err(span, msg);
180         }
181         EscapeError::InvalidEscape => {
182             let (c, span) = last_char();
183
184             let label =
185                 if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
186             let ec = escaped_char(c);
187             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
188             diag.span_label(span, label);
189             if c == '{' || c == '}' && !mode.is_bytes() {
190                 diag.help(
191                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
192                 );
193             } else if c == '\r' {
194                 diag.help(
195                     "this is an isolated carriage return; consider checking your editor and \
196                      version control settings",
197                 );
198             } else {
199                 if !mode.is_bytes() {
200                     diag.span_suggestion(
201                         span_with_quotes,
202                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
203                         format!("r\"{}\"", lit),
204                         Applicability::MaybeIncorrect,
205                     );
206                 }
207
208                 diag.help(
209                     "for more information, visit \
210                      <https://static.rust-lang.org/doc/master/reference.html#literals>",
211                 );
212             }
213             diag.emit();
214         }
215         EscapeError::TooShortHexEscape => {
216             handler.span_err(span, "numeric character escape is too short");
217         }
218         EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
219             let (c, span) = last_char();
220
221             let msg = if error == EscapeError::InvalidCharInHexEscape {
222                 "invalid character in numeric character escape"
223             } else {
224                 "invalid character in unicode escape"
225             };
226             let c = escaped_char(c);
227
228             handler
229                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
230                 .span_label(span, msg)
231                 .emit();
232         }
233         EscapeError::NonAsciiCharInByte => {
234             assert!(mode.is_bytes());
235             let (c, span) = last_char();
236             let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
237             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
238                 format!(" but is {:?}", c)
239             } else {
240                 String::new()
241             };
242             err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
243             if (c as u32) <= 0xFF {
244                 err.span_suggestion(
245                     span,
246                     &format!(
247                         "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
248                         c
249                     ),
250                     format!("\\x{:X}", c as u32),
251                     Applicability::MaybeIncorrect,
252                 );
253             } else if matches!(mode, Mode::Byte) {
254                 err.span_label(span, "this multibyte character does not fit into a single byte");
255             } else if matches!(mode, Mode::ByteStr) {
256                 let mut utf8 = String::new();
257                 utf8.push(c);
258                 err.span_suggestion(
259                     span,
260                     &format!(
261                         "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
262                         c
263                     ),
264                     utf8.as_bytes()
265                         .iter()
266                         .map(|b: &u8| format!("\\x{:X}", *b))
267                         .fold("".to_string(), |a, c| a + &c),
268                     Applicability::MaybeIncorrect,
269                 );
270             }
271             err.emit();
272         }
273         EscapeError::NonAsciiCharInByteString => {
274             assert!(mode.is_bytes());
275             let (c, span) = last_char();
276             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
277                 format!(" but is {:?}", c)
278             } else {
279                 String::new()
280             };
281             handler
282                 .struct_span_err(span, "raw byte string must be ASCII")
283                 .span_label(span, &format!("must be ASCII{}", postfix))
284                 .emit();
285         }
286         EscapeError::OutOfRangeHexEscape => {
287             handler
288                 .struct_span_err(span, "out of range hex escape")
289                 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
290                 .emit();
291         }
292         EscapeError::LeadingUnderscoreUnicodeEscape => {
293             let (c, span) = last_char();
294             let msg = "invalid start of unicode escape";
295             handler
296                 .struct_span_err(span, &format!("{}: `{}`", msg, c))
297                 .span_label(span, msg)
298                 .emit();
299         }
300         EscapeError::OverlongUnicodeEscape => {
301             handler
302                 .struct_span_err(span, "overlong unicode escape")
303                 .span_label(span, "must have at most 6 hex digits")
304                 .emit();
305         }
306         EscapeError::UnclosedUnicodeEscape => {
307             handler
308                 .struct_span_err(span, "unterminated unicode escape")
309                 .span_label(span, "missing a closing `}`")
310                 .span_suggestion_verbose(
311                     span.shrink_to_hi(),
312                     "terminate the unicode escape",
313                     "}",
314                     Applicability::MaybeIncorrect,
315                 )
316                 .emit();
317         }
318         EscapeError::NoBraceInUnicodeEscape => {
319             let msg = "incorrect unicode escape sequence";
320             let mut diag = handler.struct_span_err(span, msg);
321
322             let mut suggestion = "\\u{".to_owned();
323             let mut suggestion_len = 0;
324             let (c, char_span) = last_char();
325             let chars = once(c).chain(lit[range.end..].chars());
326             for c in chars.take(6).take_while(|c| c.is_digit(16)) {
327                 suggestion.push(c);
328                 suggestion_len += c.len_utf8();
329             }
330
331             if suggestion_len > 0 {
332                 suggestion.push('}');
333                 let hi = char_span.lo() + BytePos(suggestion_len as u32);
334                 diag.span_suggestion(
335                     span.with_hi(hi),
336                     "format of unicode escape sequences uses braces",
337                     suggestion,
338                     Applicability::MaybeIncorrect,
339                 );
340             } else {
341                 diag.span_label(span, msg);
342                 diag.help("format of unicode escape sequences is `\\u{...}`");
343             }
344
345             diag.emit();
346         }
347         EscapeError::UnicodeEscapeInByte => {
348             let msg = "unicode escape in byte string";
349             handler
350                 .struct_span_err(span, msg)
351                 .span_label(span, msg)
352                 .help("unicode escape sequences cannot be used as a byte or in a byte string")
353                 .emit();
354         }
355         EscapeError::EmptyUnicodeEscape => {
356             handler
357                 .struct_span_err(span, "empty unicode escape")
358                 .span_label(span, "this escape must have at least 1 hex digit")
359                 .emit();
360         }
361         EscapeError::ZeroChars => {
362             let msg = "empty character literal";
363             handler.struct_span_err(span, msg).span_label(span, msg).emit();
364         }
365         EscapeError::LoneSlash => {
366             let msg = "invalid trailing slash in literal";
367             handler.struct_span_err(span, msg).span_label(span, msg).emit();
368         }
369         EscapeError::UnskippedWhitespaceWarning => {
370             let (c, char_span) = last_char();
371             let msg =
372                 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
373             handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
374         }
375         EscapeError::MultipleSkippedLinesWarning => {
376             let msg = "multiple lines skipped by escaped newline";
377             let bottom_msg = "skipping everything up to and including this point";
378             handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
379         }
380     }
381 }
382
383 /// Pushes a character to a message string for error reporting
384 pub(crate) fn escaped_char(c: char) -> String {
385     match c {
386         '\u{20}'..='\u{7e}' => {
387             // Don't escape \, ' or " for user-facing messages
388             c.to_string()
389         }
390         _ => c.escape_default().to_string(),
391     }
392 }