4 pub use Nonterminal::*;
9 use crate::tokenstream::TokenTree;
11 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
12 use rustc_data_structures::sync::Lrc;
13 use rustc_macros::HashStable_Generic;
14 use rustc_span::symbol::{kw, sym};
15 use rustc_span::symbol::{Ident, Symbol};
16 use rustc_span::{self, Span, DUMMY_SP};
20 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
21 pub enum CommentKind {
26 #[derive(Clone, PartialEq, Encodable, Decodable, Hash, Debug, Copy)]
27 #[derive(HashStable_Generic)]
41 /// A delimiter token.
42 #[derive(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Debug, Copy)]
43 #[derive(HashStable_Generic)]
45 /// A round parenthesis (i.e., `(` or `)`).
47 /// A square bracket (i.e., `[` or `]`).
49 /// A curly brace (i.e., `{` or `}`).
51 /// An empty delimiter.
56 pub fn len(self) -> usize {
57 if self == NoDelim { 0 } else { 1 }
60 pub fn is_empty(self) -> bool {
65 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
67 Bool, // AST only, must never appear in a `Token`
73 StrRaw(u16), // raw string delimited by `n` hash symbols
75 ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols
80 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
84 pub suffix: Option<Symbol>,
87 impl fmt::Display for Lit {
88 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89 let Lit { kind, symbol, suffix } = *self;
91 Byte => write!(f, "b'{}'", symbol)?,
92 Char => write!(f, "'{}'", symbol)?,
93 Str => write!(f, "\"{}\"", symbol)?,
96 "r{delim}\"{string}\"{delim}",
97 delim = "#".repeat(n as usize),
100 ByteStr => write!(f, "b\"{}\"", symbol)?,
101 ByteStrRaw(n) => write!(
103 "br{delim}\"{string}\"{delim}",
104 delim = "#".repeat(n as usize),
107 Integer | Float | Bool | Err => write!(f, "{}", symbol)?,
110 if let Some(suffix) = suffix {
111 write!(f, "{}", suffix)?;
119 /// An English article for the literal token kind.
120 pub fn article(self) -> &'static str {
122 Integer | Err => "an",
127 pub fn descr(self) -> &'static str {
129 Bool => panic!("literal token contains `Lit::Bool`"),
132 Integer => "integer",
134 Str | StrRaw(..) => "string",
135 ByteStr | ByteStrRaw(..) => "byte string",
140 crate fn may_have_suffix(self) -> bool {
142 Integer | Float | Err => true,
149 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
150 Lit { kind, symbol, suffix }
154 pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: bool) -> bool {
155 let ident_token = Token::new(Ident(name, is_raw), span);
157 !ident_token.is_reserved_ident()
158 || ident_token.is_path_segment_keyword()
182 fn ident_can_begin_type(name: Symbol, span: Span, is_raw: bool) -> bool {
183 let ident_token = Token::new(Ident(name, is_raw), span);
185 !ident_token.is_reserved_ident()
186 || ident_token.is_path_segment_keyword()
187 || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]
191 #[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
193 /* Expression-operator symbols. */
208 /* Structural symbols */
224 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
226 /// An opening delimiter (e.g., `{`).
227 OpenDelim(DelimToken),
228 /// A closing delimiter (e.g., `}`).
229 CloseDelim(DelimToken),
234 /// Identifier token.
235 /// Do not forget about `NtIdent` when you want to match on identifiers.
236 /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to
237 /// treat regular and interpolated identifiers in the same way.
238 Ident(Symbol, /* is_raw */ bool),
239 /// Lifetime identifier token.
240 /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
241 /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to
242 /// treat regular and interpolated lifetime identifiers in the same way.
245 Interpolated(Lrc<Nonterminal>),
247 /// A doc comment token.
248 /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
249 /// similarly to symbols in string literal tokens.
250 DocComment(CommentKind, ast::AttrStyle, Symbol),
252 // Junk. These carry no data because we don't really care about the data
253 // they *would* carry, and don't really want to allocate a new ident for
254 // them. Instead, users could extract that from the associated span.
260 /// A completely invalid token which should be skipped.
266 // `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger.
267 #[cfg(target_arch = "x86_64")]
268 rustc_data_structures::static_assert_size!(TokenKind, 16);
270 #[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
277 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
278 Literal(Lit::new(kind, symbol, suffix))
281 // An approximation to proc-macro-style single-character operators used by rustc parser.
282 // If the operator token can be broken into two tokens, the first of which is single-character,
283 // then this function performs that operation, otherwise it returns `None`.
284 pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
290 AndAnd => (BinOp(And), BinOp(And)),
291 OrOr => (BinOp(Or), BinOp(Or)),
292 BinOp(Shl) => (Lt, Lt),
293 BinOp(Shr) => (Gt, Gt),
294 BinOpEq(Plus) => (BinOp(Plus), Eq),
295 BinOpEq(Minus) => (BinOp(Minus), Eq),
296 BinOpEq(Star) => (BinOp(Star), Eq),
297 BinOpEq(Slash) => (BinOp(Slash), Eq),
298 BinOpEq(Percent) => (BinOp(Percent), Eq),
299 BinOpEq(Caret) => (BinOp(Caret), Eq),
300 BinOpEq(And) => (BinOp(And), Eq),
301 BinOpEq(Or) => (BinOp(Or), Eq),
302 BinOpEq(Shl) => (Lt, Le),
303 BinOpEq(Shr) => (Gt, Ge),
304 DotDot => (Dot, Dot),
305 DotDotDot => (Dot, DotDot),
306 ModSep => (Colon, Colon),
307 RArrow => (BinOp(Minus), Gt),
308 LArrow => (Lt, BinOp(Minus)),
309 FatArrow => (Eq, Gt),
314 /// Returns tokens that are likely to be typed accidentally instead of the current token.
315 /// Enables better error recovery when the wrong token is found.
316 pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {
318 Comma => Some(vec![Dot, Lt, Semi]),
319 Semi => Some(vec![Colon, Comma]),
326 pub fn new(kind: TokenKind, span: Span) -> Self {
330 /// Some token that will be thrown away later.
331 pub fn dummy() -> Self {
332 Token::new(TokenKind::Whitespace, DUMMY_SP)
335 /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.
336 pub fn from_ast_ident(ident: Ident) -> Self {
337 Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span)
340 /// Return this token by value and leave a dummy token in its place.
341 pub fn take(&mut self) -> Self {
342 mem::replace(self, Token::dummy())
345 /// For interpolated tokens, returns a span of the fragment to which the interpolated
346 /// token refers. For all other tokens this is just a regular span.
347 /// It is particularly important to use this for identifiers and lifetimes
348 /// for which spans affect name resolution and edition checks.
349 /// Note that keywords are also identifiers, so they should use this
350 /// if they keep spans or perform edition checks.
351 pub fn uninterpolated_span(&self) -> Span {
353 Interpolated(nt) => nt.span(),
358 pub fn is_op(&self) -> bool {
360 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..)
361 | Lifetime(..) | Interpolated(..) | Whitespace | Comment | Shebang(..) | Eof => false,
366 pub fn is_like_plus(&self) -> bool {
368 BinOp(Plus) | BinOpEq(Plus) => true,
373 /// Returns `true` if the token can appear at the start of an expression.
374 pub fn can_begin_expr(&self) -> bool {
375 match self.uninterpolate().kind {
376 Ident(name, is_raw) =>
377 ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
378 OpenDelim(..) | // tuple, array or block
379 Literal(..) | // literal
380 Not | // operator not
381 BinOp(Minus) | // unary minus
382 BinOp(Star) | // dereference
383 BinOp(Or) | OrOr | // closure
384 BinOp(And) | // reference
385 AndAnd | // double reference
386 // DotDotDot is no longer supported, but we need some way to display the error
387 DotDot | DotDotDot | DotDotEq | // range notation
388 Lt | BinOp(Shl) | // associated path
389 ModSep | // global path
390 Lifetime(..) | // labeled loop
391 Pound => true, // expression attributes
392 Interpolated(ref nt) => match **nt {
403 /// Returns `true` if the token can appear at the start of a type.
404 pub fn can_begin_type(&self) -> bool {
405 match self.uninterpolate().kind {
406 Ident(name, is_raw) =>
407 ident_can_begin_type(name, self.span, is_raw), // type name or keyword
408 OpenDelim(Paren) | // tuple
409 OpenDelim(Bracket) | // array
411 BinOp(Star) | // raw pointer
412 BinOp(And) | // reference
413 AndAnd | // double reference
414 Question | // maybe bound in trait object
415 Lifetime(..) | // lifetime bound in trait object
416 Lt | BinOp(Shl) | // associated path
417 ModSep => true, // global path
418 Interpolated(ref nt) => match **nt {
419 NtTy(..) | NtPath(..) => true,
426 /// Returns `true` if the token can appear at the start of a const param.
427 pub fn can_begin_const_arg(&self) -> bool {
429 OpenDelim(Brace) => true,
430 Interpolated(ref nt) => match **nt {
431 NtExpr(..) | NtBlock(..) | NtLiteral(..) => true,
434 _ => self.can_begin_literal_maybe_minus(),
438 /// Returns `true` if the token can appear at the start of a generic bound.
439 pub fn can_begin_bound(&self) -> bool {
441 || self.is_lifetime()
442 || self.is_keyword(kw::For)
444 || self == &OpenDelim(Paren)
447 /// Returns `true` if the token is any literal
448 pub fn is_lit(&self) -> bool {
455 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
456 /// for example a '-42', or one of the boolean idents).
458 /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?
460 /// Keep this in sync with and `Lit::from_token`, excluding unary negation.
461 pub fn can_begin_literal_maybe_minus(&self) -> bool {
462 match self.uninterpolate().kind {
463 Literal(..) | BinOp(Minus) => true,
464 Ident(name, false) if name.is_bool_lit() => true,
465 Interpolated(ref nt) => match &**nt {
466 NtLiteral(_) => true,
467 NtExpr(e) => match &e.kind {
468 ast::ExprKind::Lit(_) => true,
469 ast::ExprKind::Unary(ast::UnOp::Neg, e) => {
470 matches!(&e.kind, ast::ExprKind::Lit(_))
480 // A convenience function for matching on identifiers during parsing.
481 // Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token
482 // into the regular identifier or lifetime token it refers to,
483 // otherwise returns the original token.
484 pub fn uninterpolate(&self) -> Cow<'_, Token> {
486 Interpolated(nt) => match **nt {
487 NtIdent(ident, is_raw) => {
488 Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span))
490 NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)),
491 _ => Cow::Borrowed(self),
493 _ => Cow::Borrowed(self),
497 /// Returns an identifier if this token is an identifier.
498 pub fn ident(&self) -> Option<(Ident, /* is_raw */ bool)> {
499 let token = self.uninterpolate();
501 Ident(name, is_raw) => Some((Ident::new(name, token.span), is_raw)),
506 /// Returns a lifetime identifier if this token is a lifetime.
507 pub fn lifetime(&self) -> Option<Ident> {
508 let token = self.uninterpolate();
510 Lifetime(name) => Some(Ident::new(name, token.span)),
515 /// Returns `true` if the token is an identifier.
516 pub fn is_ident(&self) -> bool {
517 self.ident().is_some()
520 /// Returns `true` if the token is a lifetime.
521 pub fn is_lifetime(&self) -> bool {
522 self.lifetime().is_some()
525 /// Returns `true` if the token is a identifier whose name is the given
527 pub fn is_ident_named(&self, name: Symbol) -> bool {
528 self.ident().map_or(false, |(ident, _)| ident.name == name)
531 /// Returns `true` if the token is an interpolated path.
532 fn is_path(&self) -> bool {
533 if let Interpolated(ref nt) = self.kind {
534 if let NtPath(..) = **nt {
541 /// Would `maybe_whole_expr` in `parser.rs` return `Ok(..)`?
542 /// That is, is this a pre-parsed expression dropped into the token stream
543 /// (which happens while parsing the result of macro expansion)?
544 pub fn is_whole_expr(&self) -> bool {
545 if let Interpolated(ref nt) = self.kind {
546 if let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtIdent(..) | NtBlock(_) = **nt {
554 // Is the token an interpolated block (`$b:block`)?
555 pub fn is_whole_block(&self) -> bool {
556 if let Interpolated(ref nt) = self.kind {
557 if let NtBlock(..) = **nt {
564 /// Returns `true` if the token is either the `mut` or `const` keyword.
565 pub fn is_mutability(&self) -> bool {
566 self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)
569 pub fn is_qpath_start(&self) -> bool {
570 self == &Lt || self == &BinOp(Shl)
573 pub fn is_path_start(&self) -> bool {
575 || self.is_qpath_start()
577 || self.is_path_segment_keyword()
578 || self.is_ident() && !self.is_reserved_ident()
581 /// Returns `true` if the token is a given keyword, `kw`.
582 pub fn is_keyword(&self, kw: Symbol) -> bool {
583 self.is_non_raw_ident_where(|id| id.name == kw)
586 pub fn is_path_segment_keyword(&self) -> bool {
587 self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
590 // Returns true for reserved identifiers used internally for elided lifetimes,
591 // unnamed method parameters, crate root module, error recovery etc.
592 pub fn is_special_ident(&self) -> bool {
593 self.is_non_raw_ident_where(Ident::is_special)
596 /// Returns `true` if the token is a keyword used in the language.
597 pub fn is_used_keyword(&self) -> bool {
598 self.is_non_raw_ident_where(Ident::is_used_keyword)
601 /// Returns `true` if the token is a keyword reserved for possible future use.
602 pub fn is_unused_keyword(&self) -> bool {
603 self.is_non_raw_ident_where(Ident::is_unused_keyword)
606 /// Returns `true` if the token is either a special identifier or a keyword.
607 pub fn is_reserved_ident(&self) -> bool {
608 self.is_non_raw_ident_where(Ident::is_reserved)
611 /// Returns `true` if the token is the identifier `true` or `false`.
612 pub fn is_bool_lit(&self) -> bool {
613 self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
616 /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
617 pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {
619 Some((id, false)) => pred(id),
624 pub fn glue(&self, joint: &Token) -> Option<Token> {
625 let kind = match self.kind {
626 Eq => match joint.kind {
631 Lt => match joint.kind {
635 BinOp(Minus) => LArrow,
638 Gt => match joint.kind {
644 Not => match joint.kind {
648 BinOp(op) => match joint.kind {
650 BinOp(And) if op == And => AndAnd,
651 BinOp(Or) if op == Or => OrOr,
652 Gt if op == Minus => RArrow,
655 Dot => match joint.kind {
660 DotDot => match joint.kind {
665 Colon => match joint.kind {
669 SingleQuote => match joint.kind {
670 Ident(name, false) => Lifetime(Symbol::intern(&format!("'{}", name))),
674 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot
675 | DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar
676 | Question | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..)
677 | Lifetime(..) | Interpolated(..) | DocComment(..) | Whitespace | Comment
678 | Shebang(..) | Unknown(..) | Eof => return None,
681 Some(Token::new(kind, self.span.to(joint.span)))
685 impl PartialEq<TokenKind> for Token {
686 fn eq(&self, rhs: &TokenKind) -> bool {
691 #[derive(Clone, Encodable, Decodable)]
692 /// For interpolation during macro expansion.
693 pub enum Nonterminal {
694 NtItem(P<ast::Item>),
695 NtBlock(P<ast::Block>),
698 NtExpr(P<ast::Expr>),
700 NtIdent(Ident, /* is_raw */ bool),
702 NtLiteral(P<ast::Expr>),
703 /// Stuff inside brackets for attributes
704 NtMeta(P<ast::AttrItem>),
706 NtVis(ast::Visibility),
710 // `Nonterminal` is used a lot. Make sure it doesn't unintentionally get bigger.
711 #[cfg(target_arch = "x86_64")]
712 rustc_data_structures::static_assert_size!(Nonterminal, 40);
714 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable)]
715 pub enum NonterminalKind {
731 impl NonterminalKind {
732 pub fn from_symbol(symbol: Symbol) -> Option<NonterminalKind> {
734 sym::item => NonterminalKind::Item,
735 sym::block => NonterminalKind::Block,
736 sym::stmt => NonterminalKind::Stmt,
737 sym::pat => NonterminalKind::Pat,
738 sym::expr => NonterminalKind::Expr,
739 sym::ty => NonterminalKind::Ty,
740 sym::ident => NonterminalKind::Ident,
741 sym::lifetime => NonterminalKind::Lifetime,
742 sym::literal => NonterminalKind::Literal,
743 sym::meta => NonterminalKind::Meta,
744 sym::path => NonterminalKind::Path,
745 sym::vis => NonterminalKind::Vis,
746 sym::tt => NonterminalKind::TT,
750 fn symbol(self) -> Symbol {
752 NonterminalKind::Item => sym::item,
753 NonterminalKind::Block => sym::block,
754 NonterminalKind::Stmt => sym::stmt,
755 NonterminalKind::Pat => sym::pat,
756 NonterminalKind::Expr => sym::expr,
757 NonterminalKind::Ty => sym::ty,
758 NonterminalKind::Ident => sym::ident,
759 NonterminalKind::Lifetime => sym::lifetime,
760 NonterminalKind::Literal => sym::literal,
761 NonterminalKind::Meta => sym::meta,
762 NonterminalKind::Path => sym::path,
763 NonterminalKind::Vis => sym::vis,
764 NonterminalKind::TT => sym::tt,
769 impl fmt::Display for NonterminalKind {
770 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
771 write!(f, "{}", self.symbol())
776 fn span(&self) -> Span {
778 NtItem(item) => item.span,
779 NtBlock(block) => block.span,
780 NtStmt(stmt) => stmt.span,
781 NtPat(pat) => pat.span,
782 NtExpr(expr) | NtLiteral(expr) => expr.span,
784 NtIdent(ident, _) | NtLifetime(ident) => ident.span,
785 NtMeta(attr_item) => attr_item.span(),
786 NtPath(path) => path.span,
787 NtVis(vis) => vis.span,
788 NtTT(tt) => tt.span(),
792 /// This nonterminal looks like some specific enums from
793 /// `proc-macro-hack` and `procedural-masquerade` crates.
794 /// We need to maintain some special pretty-printing behavior for them due to incorrect
795 /// asserts in old versions of those crates and their wide use in the ecosystem.
796 /// See issue #73345 for more details.
797 /// FIXME(#73933): Remove this eventually.
798 pub fn pretty_printing_compatibility_hack(&self) -> bool {
799 if let NtItem(item) = self {
800 let name = item.ident.name;
801 if name == sym::ProceduralMasqueradeDummyType || name == sym::ProcMacroHack {
802 if let ast::ItemKind::Enum(enum_def, _) = &item.kind {
803 if let [variant] = &*enum_def.variants {
804 return variant.ident.name == sym::Input;
813 impl PartialEq for Nonterminal {
814 fn eq(&self, rhs: &Self) -> bool {
816 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) => {
817 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs
819 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
820 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
821 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
822 // correctly based on data from AST. This will prevent them from matching each other
823 // in macros. The comparison will become possible only when each nonterminal has an
824 // attached token stream from which it was parsed.
830 impl fmt::Debug for Nonterminal {
831 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
833 NtItem(..) => f.pad("NtItem(..)"),
834 NtBlock(..) => f.pad("NtBlock(..)"),
835 NtStmt(..) => f.pad("NtStmt(..)"),
836 NtPat(..) => f.pad("NtPat(..)"),
837 NtExpr(..) => f.pad("NtExpr(..)"),
838 NtTy(..) => f.pad("NtTy(..)"),
839 NtIdent(..) => f.pad("NtIdent(..)"),
840 NtLiteral(..) => f.pad("NtLiteral(..)"),
841 NtMeta(..) => f.pad("NtMeta(..)"),
842 NtPath(..) => f.pad("NtPath(..)"),
843 NtTT(..) => f.pad("NtTT(..)"),
844 NtVis(..) => f.pad("NtVis(..)"),
845 NtLifetime(..) => f.pad("NtLifetime(..)"),
850 impl<CTX> HashStable<CTX> for Nonterminal
852 CTX: crate::HashStableContext,
854 fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
855 panic!("interpolated tokens should not be present in the HIR")