use std::iter::once;
use std::ops::Range;
-use rustc_errors::{Applicability, Handler};
+use rustc_errors::{pluralize, Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};
.emit();
}
EscapeError::MoreThanOneChar => {
- let (prefix, msg) = if mode.is_bytes() {
- ("b", "if you meant to write a byte string literal, use double quotes")
- } else {
- ("", "if you meant to write a `str` literal, use double quotes")
- };
+ use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
- handler
- .struct_span_err(
- span_with_quotes,
- "character literal may only contain one codepoint",
- )
- .span_suggestion(
+ let mut has_help = false;
+ let mut handler = handler.struct_span_err(
+ span_with_quotes,
+ "character literal may only contain one codepoint",
+ );
+
+ if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
+ let escaped_marks =
+ lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
+ handler.span_note(
+ span,
+ &format!(
+ "this `{}` is followed by the combining mark{} `{}`",
+ lit.chars().next().unwrap(),
+ pluralize!(escaped_marks.len()),
+ escaped_marks.join(""),
+ ),
+ );
+ let normalized = lit.nfc().to_string();
+ if normalized.chars().count() == 1 {
+ has_help = true;
+ handler.span_suggestion(
+ span,
+ &format!(
+ "consider using the normalized form `{}` of this character",
+ normalized.chars().next().unwrap().escape_default()
+ ),
+ normalized,
+ Applicability::MachineApplicable,
+ );
+ }
+ }
+
+ if !has_help {
+ let (prefix, msg) = if mode.is_bytes() {
+ ("b", "if you meant to write a byte string literal, use double quotes")
+ } else {
+ ("", "if you meant to write a `str` literal, use double quotes")
+ };
+
+ handler.span_suggestion(
span_with_quotes,
msg,
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
- )
- .emit();
+ );
+ }
+
+ handler.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, char_span) = last_char();
--- /dev/null
+// Regression test for #88684: Improve diagnostics for combining marks
+// in character literals.
+
+// run-rustfix
+
+fn main() {
+ let _spade = "♠️";
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
+ //~| HELP: if you meant to write a `str` literal, use double quotes
+
+ let _s = "ṩ̂̊";
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
+ //~| HELP: if you meant to write a `str` literal, use double quotes
+
+ let _a = 'Å';
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `A` is followed by the combining mark `\u{30a}`
+ //~| HELP: consider using the normalized form `\u{c5}` of this character
+}
--- /dev/null
+// Regression test for #88684: Improve diagnostics for combining marks
+// in character literals.
+
+// run-rustfix
+
+fn main() {
+ let _spade = '♠️';
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
+ //~| HELP: if you meant to write a `str` literal, use double quotes
+
+ let _s = 'ṩ̂̊';
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
+ //~| HELP: if you meant to write a `str` literal, use double quotes
+
+ let _a = 'Å';
+ //~^ ERROR: character literal may only contain one codepoint
+ //~| NOTE: this `A` is followed by the combining mark `\u{30a}`
+ //~| HELP: consider using the normalized form `\u{c5}` of this character
+}
--- /dev/null
+error: character literal may only contain one codepoint
+ --> $DIR/unicode-character-literal.rs:7:18
+ |
+LL | let _spade = '♠️';
+ | ^^^
+ |
+note: this `♠` is followed by the combining mark `\u{fe0f}`
+ --> $DIR/unicode-character-literal.rs:7:19
+ |
+LL | let _spade = '♠️';
+ | ^
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let _spade = "♠️";
+ | ~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/unicode-character-literal.rs:12:14
+ |
+LL | let _s = 'ṩ̂̊';
+ | ^^^
+ |
+note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
+ --> $DIR/unicode-character-literal.rs:12:15
+ |
+LL | let _s = 'ṩ̂̊';
+ | ^
+help: if you meant to write a `str` literal, use double quotes
+ |
+LL | let _s = "ṩ̂̊";
+ | ~~~
+
+error: character literal may only contain one codepoint
+ --> $DIR/unicode-character-literal.rs:17:14
+ |
+LL | let _a = 'Å';
+ | ^-^
+ | |
+ | help: consider using the normalized form `\u{c5}` of this character: `Å`
+ |
+note: this `A` is followed by the combining mark `\u{30a}`
+ --> $DIR/unicode-character-literal.rs:17:15
+ |
+LL | let _a = 'Å';
+ | ^
+
+error: aborting due to 3 previous errors
+