]> git.lizzy.rs Git - rust.git/blobdiff - compiler/rustc_parse/src/lexer/mod.rs
Auto merge of #107840 - matthiaskrgr:rollup-e6v7x0a, r=matthiaskrgr
[rust.git] / compiler / rustc_parse / src / lexer / mod.rs
index e957224a03377805bbfb7fa666d60df3c29cb607..bd998ed91d977d8f68a89e54f091e9b95b72e535 100644 (file)
@@ -1,11 +1,10 @@
+use crate::errors;
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{
-    error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
-};
+use rustc_errors::{error_code, Applicability, DiagnosticBuilder, PResult, StashKey};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
@@ -151,7 +150,7 @@ fn next_token(&mut self) -> (Token, bool) {
                     let span = self.mk_sp(start, self.pos);
                     self.sess.symbol_gallery.insert(sym, span);
                     if !sym.can_be_raw() {
-                        self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+                        self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
                     }
                     self.sess.raw_identifier_spans.borrow_mut().push(span);
                     token::Ident(sym, true)
@@ -262,27 +261,24 @@ fn next_token(&mut self) -> (Token, bool) {
                         self.nbsp_is_whitespace = true;
                     }
                     let repeats = it.take_while(|c1| *c1 == c).count();
-                    let mut err =
-                        self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
                     // FIXME: the lexer could be used to turn the ASCII version of unicode
                     // homoglyphs, instead of keeping a table in `check_for_substitution`into the
                     // token. Ideally, this should be inside `rustc_lexer`. However, we should
                     // first remove compound tokens like `<<` from `rustc_lexer`, and then add
                     // fancier error recovery to it, as there will be less overall work to do this
                     // way.
-                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
-                    if c == '\x00' {
-                        err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
-                    }
-                    if repeats > 0 {
-                        if repeats == 1 {
-                            err.note(format!("character appears once more"));
-                        } else {
-                            err.note(format!("character appears {repeats} more times"));
-                        }
-                        swallow_next_invalid = repeats;
-                    }
-                    err.emit();
+                    let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+                    self.sess.emit_err(errors::UnknownTokenStart {
+                        span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
+                        escaped: escaped_char(c),
+                        sugg,
+                        null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+                        repeat: if repeats > 0 {
+                            swallow_next_invalid = repeats;
+                            Some(errors::UnknownTokenRepeat { repeats })
+                        } else {None}
+                    });
+
                     if let Some(token) = token {
                         token
                     } else {
@@ -297,26 +293,6 @@ fn next_token(&mut self) -> (Token, bool) {
         }
     }
 
-    /// Report a fatal lexical error with a given span.
-    fn fatal_span(&self, sp: Span, m: &str) -> ! {
-        self.sess.span_diagnostic.span_fatal(sp, m)
-    }
-
-    /// Report a lexical error with a given span.
-    fn err_span(&self, sp: Span, m: &str) {
-        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-    }
-
-    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
-        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`).
-    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-        self.err_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
     fn struct_fatal_span_char(
         &self,
         from_pos: BytePos,
@@ -329,18 +305,6 @@ fn struct_fatal_span_char(
             .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
     }
 
-    fn struct_err_span_char(
-        &self,
-        from_pos: BytePos,
-        to_pos: BytePos,
-        m: &str,
-        c: char,
-    ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
-        self.sess
-            .span_diagnostic
-            .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
-    }
-
     /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
     /// complain about it.
     fn lint_unicode_text_flow(&self, start: BytePos) {
@@ -368,14 +332,12 @@ fn cook_doc_comment(
     ) -> TokenKind {
         if content.contains('\r') {
             for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
-                self.err_span_(
+                let span = self.mk_sp(
                     content_start + BytePos(idx as u32),
                     content_start + BytePos(idx as u32 + 1),
-                    match comment_kind {
-                        CommentKind::Line => "bare CR not allowed in doc-comment",
-                        CommentKind::Block => "bare CR not allowed in block doc-comment",
-                    },
                 );
+                let block = matches!(comment_kind, CommentKind::Block);
+                self.sess.emit_err(errors::CrDocComment { span, block });
             }
         }
 
@@ -454,26 +416,20 @@ fn cook_lexer_literal(
             }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 if empty_int {
-                    self.sess
-                        .span_diagnostic
-                        .struct_span_err_with_code(
-                            self.mk_sp(start, end),
-                            "no valid digits found for number",
-                            error_code!(E0768),
-                        )
-                        .emit();
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::NoDigitsLiteral { span });
                     (token::Integer, sym::integer(0))
                 } else {
                     if matches!(base, Base::Binary | Base::Octal) {
                         let base = base as u32;
                         let s = self.str_from_to(start + BytePos(2), end);
                         for (idx, c) in s.char_indices() {
+                            let span = self.mk_sp(
+                                start + BytePos::from_usize(2 + idx),
+                                start + BytePos::from_usize(2 + idx + c.len_utf8()),
+                            );
                             if c != '_' && c.to_digit(base).is_none() {
-                                self.err_span_(
-                                    start + BytePos::from_usize(2 + idx),
-                                    start + BytePos::from_usize(2 + idx + c.len_utf8()),
-                                    &format!("invalid digit for a base {} literal", base),
-                                );
+                                self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
                             }
                         }
                     }
@@ -482,19 +438,18 @@ fn cook_lexer_literal(
             }
             rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
                 if empty_exponent {
-                    self.err_span_(start, self.pos, "expected at least one digit in exponent");
+                    let span = self.mk_sp(start, self.pos);
+                    self.sess.emit_err(errors::EmptyExponentFloat { span });
                 }
-                match base {
-                    Base::Hexadecimal => {
-                        self.err_span_(start, end, "hexadecimal float literal is not supported")
-                    }
-                    Base::Octal => {
-                        self.err_span_(start, end, "octal float literal is not supported")
-                    }
-                    Base::Binary => {
-                        self.err_span_(start, end, "binary float literal is not supported")
-                    }
-                    _ => {}
+                let base = match base {
+                    Base::Hexadecimal => Some("hexadecimal"),
+                    Base::Octal => Some("octal"),
+                    Base::Binary => Some("binary"),
+                    _ => None,
+                };
+                if let Some(base) = base {
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
                 }
                 (token::Float, self.symbol_from_to(start, end))
             }
@@ -644,54 +599,34 @@ fn report_unterminated_block_comment(&self, start: BytePos, doc_style: Option<Do
     // identifier tokens.
     fn report_unknown_prefix(&self, start: BytePos) {
         let prefix_span = self.mk_sp(start, self.pos);
-        let prefix_str = self.str_from_to(start, self.pos);
-        let msg = format!("prefix `{}` is unknown", prefix_str);
+        let prefix = self.str_from_to(start, self.pos);
 
         let expn_data = prefix_span.ctxt().outer_expn_data();
 
         if expn_data.edition >= Edition::Edition2021 {
             // In Rust 2021, this is a hard error.
-            let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
-            err.span_label(prefix_span, "unknown prefix");
-            if prefix_str == "rb" {
-                err.span_suggestion_verbose(
-                    prefix_span,
-                    "use `br` for a raw byte string",
-                    "br",
-                    Applicability::MaybeIncorrect,
-                );
+            let sugg = if prefix == "rb" {
+                Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
             } else if expn_data.is_root() {
-                err.span_suggestion_verbose(
-                    prefix_span.shrink_to_hi(),
-                    "consider inserting whitespace here",
-                    " ",
-                    Applicability::MaybeIncorrect,
-                );
-            }
-            err.note("prefixed identifiers and literals are reserved since Rust 2021");
-            err.emit();
+                Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+            } else {
+                None
+            };
+            self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
         } else {
             // Before Rust 2021, only emit a lint for migration.
             self.sess.buffer_lint_with_diagnostic(
                 &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
                 prefix_span,
                 ast::CRATE_NODE_ID,
-                &msg,
+                &format!("prefix `{prefix}` is unknown"),
                 BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
             );
         }
     }
 
-    fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
-        self.fatal_span_(
-            start,
-            self.pos,
-            &format!(
-                "too many `#` symbols: raw strings may be delimited \
-                by up to 255 `#` symbols, but found {}",
-                found
-            ),
-        )
+    fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
+        self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
 
     fn cook_quoted(