3 pub use Nonterminal::*;
9 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
10 use rustc_data_structures::sync::Lrc;
11 use rustc_macros::HashStable_Generic;
12 use rustc_span::symbol::{kw, sym};
13 use rustc_span::symbol::{Ident, Symbol};
14 use rustc_span::{self, edition::Edition, Span, DUMMY_SP};
18 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
19 pub enum CommentKind {
24 #[derive(Clone, PartialEq, Encodable, Decodable, Hash, Debug, Copy)]
25 #[derive(HashStable_Generic)]
39 /// Describes how a sequence of token trees is delimited.
40 /// Cannot use `proc_macro::Delimiter` directly because this
41 /// structure should implement some additional traits.
42 /// The `None` variant is also renamed to `Invisible` to be
43 /// less confusing and better convey the semantics.
44 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
45 #[derive(Encodable, Decodable, Hash, HashStable_Generic)]
54 /// An invisible delimiter, that may, for example, appear around tokens coming from a
55 /// "macro variable" `$var`. It is important to preserve operator priorities in cases like
56 /// `$var * 3` where `$var` is `1 + 2`.
57 /// Invisible delimiters might not survive roundtrip of a token stream through a string.
61 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
63 Bool, // AST only, must never appear in a `Token`
69 StrRaw(u8), // raw string delimited by `n` hash symbols
71 ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
76 #[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
80 pub suffix: Option<Symbol>,
83 impl fmt::Display for Lit {
84 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85 let Lit { kind, symbol, suffix } = *self;
87 Byte => write!(f, "b'{}'", symbol)?,
88 Char => write!(f, "'{}'", symbol)?,
89 Str => write!(f, "\"{}\"", symbol)?,
92 "r{delim}\"{string}\"{delim}",
93 delim = "#".repeat(n as usize),
96 ByteStr => write!(f, "b\"{}\"", symbol)?,
97 ByteStrRaw(n) => write!(
99 "br{delim}\"{string}\"{delim}",
100 delim = "#".repeat(n as usize),
103 Integer | Float | Bool | Err => write!(f, "{}", symbol)?,
106 if let Some(suffix) = suffix {
107 write!(f, "{}", suffix)?;
115 /// An English article for the literal token kind.
116 pub fn article(self) -> &'static str {
118 Integer | Err => "an",
123 pub fn descr(self) -> &'static str {
125 Bool => panic!("literal token contains `Lit::Bool`"),
128 Integer => "integer",
130 Str | StrRaw(..) => "string",
131 ByteStr | ByteStrRaw(..) => "byte string",
136 crate fn may_have_suffix(self) -> bool {
137 matches!(self, Integer | Float | Err)
142 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
143 Lit { kind, symbol, suffix }
147 pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: bool) -> bool {
148 let ident_token = Token::new(Ident(name, is_raw), span);
150 !ident_token.is_reserved_ident()
151 || ident_token.is_path_segment_keyword()
177 fn ident_can_begin_type(name: Symbol, span: Span, is_raw: bool) -> bool {
178 let ident_token = Token::new(Ident(name, is_raw), span);
180 !ident_token.is_reserved_ident()
181 || ident_token.is_path_segment_keyword()
182 || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]
186 #[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
188 /* Expression-operator symbols. */
203 /* Structural symbols */
219 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
221 /// An opening delimiter (e.g., `{`).
222 OpenDelim(Delimiter),
223 /// A closing delimiter (e.g., `}`).
224 CloseDelim(Delimiter),
229 /// Identifier token.
230 /// Do not forget about `NtIdent` when you want to match on identifiers.
231 /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to
232 /// treat regular and interpolated identifiers in the same way.
233 Ident(Symbol, /* is_raw */ bool),
234 /// Lifetime identifier token.
235 /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
236 /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to
237 /// treat regular and interpolated lifetime identifiers in the same way.
240 Interpolated(Lrc<Nonterminal>),
242 /// A doc comment token.
243 /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
244 /// similarly to symbols in string literal tokens.
245 DocComment(CommentKind, ast::AttrStyle, Symbol),
250 // `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger.
251 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
252 rustc_data_structures::static_assert_size!(TokenKind, 16);
254 #[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
261 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
262 Literal(Lit::new(kind, symbol, suffix))
265 // An approximation to proc-macro-style single-character operators used by rustc parser.
266 // If the operator token can be broken into two tokens, the first of which is single-character,
267 // then this function performs that operation, otherwise it returns `None`.
268 pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
274 AndAnd => (BinOp(And), BinOp(And)),
275 OrOr => (BinOp(Or), BinOp(Or)),
276 BinOp(Shl) => (Lt, Lt),
277 BinOp(Shr) => (Gt, Gt),
278 BinOpEq(Plus) => (BinOp(Plus), Eq),
279 BinOpEq(Minus) => (BinOp(Minus), Eq),
280 BinOpEq(Star) => (BinOp(Star), Eq),
281 BinOpEq(Slash) => (BinOp(Slash), Eq),
282 BinOpEq(Percent) => (BinOp(Percent), Eq),
283 BinOpEq(Caret) => (BinOp(Caret), Eq),
284 BinOpEq(And) => (BinOp(And), Eq),
285 BinOpEq(Or) => (BinOp(Or), Eq),
286 BinOpEq(Shl) => (Lt, Le),
287 BinOpEq(Shr) => (Gt, Ge),
288 DotDot => (Dot, Dot),
289 DotDotDot => (Dot, DotDot),
290 ModSep => (Colon, Colon),
291 RArrow => (BinOp(Minus), Gt),
292 LArrow => (Lt, BinOp(Minus)),
293 FatArrow => (Eq, Gt),
298 /// Returns tokens that are likely to be typed accidentally instead of the current token.
299 /// Enables better error recovery when the wrong token is found.
300 pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {
302 Comma => Some(vec![Dot, Lt, Semi]),
303 Semi => Some(vec![Colon, Comma]),
304 FatArrow => Some(vec![Eq, RArrow]),
309 pub fn should_end_const_arg(&self) -> bool {
310 matches!(self, Gt | Ge | BinOp(Shr) | BinOpEq(Shr))
315 pub fn new(kind: TokenKind, span: Span) -> Self {
319 /// Some token that will be thrown away later.
320 pub fn dummy() -> Self {
321 Token::new(TokenKind::Question, DUMMY_SP)
324 /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.
325 pub fn from_ast_ident(ident: Ident) -> Self {
326 Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span)
329 /// Return this token by value and leave a dummy token in its place.
330 pub fn take(&mut self) -> Self {
331 mem::replace(self, Token::dummy())
334 /// For interpolated tokens, returns a span of the fragment to which the interpolated
335 /// token refers. For all other tokens this is just a regular span.
336 /// It is particularly important to use this for identifiers and lifetimes
337 /// for which spans affect name resolution and edition checks.
338 /// Note that keywords are also identifiers, so they should use this
339 /// if they keep spans or perform edition checks.
340 pub fn uninterpolated_span(&self) -> Span {
342 Interpolated(nt) => nt.span(),
347 pub fn is_op(&self) -> bool {
361 pub fn is_like_plus(&self) -> bool {
362 matches!(self.kind, BinOp(Plus) | BinOpEq(Plus))
365 /// Returns `true` if the token can appear at the start of an expression.
366 pub fn can_begin_expr(&self) -> bool {
367 match self.uninterpolate().kind {
368 Ident(name, is_raw) =>
369 ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
370 OpenDelim(..) | // tuple, array or block
371 Literal(..) | // literal
372 Not | // operator not
373 BinOp(Minus) | // unary minus
374 BinOp(Star) | // dereference
375 BinOp(Or) | OrOr | // closure
376 BinOp(And) | // reference
377 AndAnd | // double reference
378 // DotDotDot is no longer supported, but we need some way to display the error
379 DotDot | DotDotDot | DotDotEq | // range notation
380 Lt | BinOp(Shl) | // associated path
381 ModSep | // global path
382 Lifetime(..) | // labeled loop
383 Pound => true, // expression attributes
384 Interpolated(ref nt) => matches!(**nt, NtLiteral(..) |
392 /// Returns `true` if the token can appear at the start of a type.
393 pub fn can_begin_type(&self) -> bool {
394 match self.uninterpolate().kind {
395 Ident(name, is_raw) =>
396 ident_can_begin_type(name, self.span, is_raw), // type name or keyword
397 OpenDelim(Delimiter::Parenthesis) | // tuple
398 OpenDelim(Delimiter::Bracket) | // array
400 BinOp(Star) | // raw pointer
401 BinOp(And) | // reference
402 AndAnd | // double reference
403 Question | // maybe bound in trait object
404 Lifetime(..) | // lifetime bound in trait object
405 Lt | BinOp(Shl) | // associated path
406 ModSep => true, // global path
407 Interpolated(ref nt) => matches!(**nt, NtTy(..) | NtPath(..)),
412 /// Returns `true` if the token can appear at the start of a const param.
413 pub fn can_begin_const_arg(&self) -> bool {
415 OpenDelim(Delimiter::Brace) => true,
416 Interpolated(ref nt) => matches!(**nt, NtExpr(..) | NtBlock(..) | NtLiteral(..)),
417 _ => self.can_begin_literal_maybe_minus(),
421 /// Returns `true` if the token can appear at the start of a generic bound.
422 pub fn can_begin_bound(&self) -> bool {
424 || self.is_lifetime()
425 || self.is_keyword(kw::For)
427 || self == &OpenDelim(Delimiter::Parenthesis)
430 /// Returns `true` if the token is any literal.
431 pub fn is_lit(&self) -> bool {
432 matches!(self.kind, Literal(..))
435 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
436 /// for example a '-42', or one of the boolean idents).
438 /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?
440 /// Keep this in sync with and `Lit::from_token`, excluding unary negation.
441 pub fn can_begin_literal_maybe_minus(&self) -> bool {
442 match self.uninterpolate().kind {
443 Literal(..) | BinOp(Minus) => true,
444 Ident(name, false) if name.is_bool_lit() => true,
445 Interpolated(ref nt) => match &**nt {
446 NtLiteral(_) => true,
447 NtExpr(e) => match &e.kind {
448 ast::ExprKind::Lit(_) => true,
449 ast::ExprKind::Unary(ast::UnOp::Neg, e) => {
450 matches!(&e.kind, ast::ExprKind::Lit(_))
460 // A convenience function for matching on identifiers during parsing.
461 // Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token
462 // into the regular identifier or lifetime token it refers to,
463 // otherwise returns the original token.
464 pub fn uninterpolate(&self) -> Cow<'_, Token> {
466 Interpolated(nt) => match **nt {
467 NtIdent(ident, is_raw) => {
468 Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span))
470 NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)),
471 _ => Cow::Borrowed(self),
473 _ => Cow::Borrowed(self),
477 /// Returns an identifier if this token is an identifier.
478 pub fn ident(&self) -> Option<(Ident, /* is_raw */ bool)> {
479 let token = self.uninterpolate();
481 Ident(name, is_raw) => Some((Ident::new(name, token.span), is_raw)),
486 /// Returns a lifetime identifier if this token is a lifetime.
487 pub fn lifetime(&self) -> Option<Ident> {
488 let token = self.uninterpolate();
490 Lifetime(name) => Some(Ident::new(name, token.span)),
495 /// Returns `true` if the token is an identifier.
496 pub fn is_ident(&self) -> bool {
497 self.ident().is_some()
500 /// Returns `true` if the token is a lifetime.
501 pub fn is_lifetime(&self) -> bool {
502 self.lifetime().is_some()
505 /// Returns `true` if the token is an identifier whose name is the given
507 pub fn is_ident_named(&self, name: Symbol) -> bool {
508 self.ident().map_or(false, |(ident, _)| ident.name == name)
511 /// Returns `true` if the token is an interpolated path.
512 fn is_path(&self) -> bool {
513 if let Interpolated(ref nt) = self.kind && let NtPath(..) = **nt {
519 /// Would `maybe_whole_expr` in `parser.rs` return `Ok(..)`?
520 /// That is, is this a pre-parsed expression dropped into the token stream
521 /// (which happens while parsing the result of macro expansion)?
522 pub fn is_whole_expr(&self) -> bool {
523 if let Interpolated(ref nt) = self.kind
524 && let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtIdent(..) | NtBlock(_) = **nt
532 // Is the token an interpolated block (`$b:block`)?
533 pub fn is_whole_block(&self) -> bool {
534 if let Interpolated(ref nt) = self.kind && let NtBlock(..) = **nt {
540 /// Returns `true` if the token is either the `mut` or `const` keyword.
541 pub fn is_mutability(&self) -> bool {
542 self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)
545 pub fn is_qpath_start(&self) -> bool {
546 self == &Lt || self == &BinOp(Shl)
549 pub fn is_path_start(&self) -> bool {
551 || self.is_qpath_start()
553 || self.is_path_segment_keyword()
554 || self.is_ident() && !self.is_reserved_ident()
557 /// Returns `true` if the token is a given keyword, `kw`.
558 pub fn is_keyword(&self, kw: Symbol) -> bool {
559 self.is_non_raw_ident_where(|id| id.name == kw)
562 pub fn is_path_segment_keyword(&self) -> bool {
563 self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
566 // Returns true for reserved identifiers used internally for elided lifetimes,
567 // unnamed method parameters, crate root module, error recovery etc.
568 pub fn is_special_ident(&self) -> bool {
569 self.is_non_raw_ident_where(Ident::is_special)
572 /// Returns `true` if the token is a keyword used in the language.
573 pub fn is_used_keyword(&self) -> bool {
574 self.is_non_raw_ident_where(Ident::is_used_keyword)
577 /// Returns `true` if the token is a keyword reserved for possible future use.
578 pub fn is_unused_keyword(&self) -> bool {
579 self.is_non_raw_ident_where(Ident::is_unused_keyword)
582 /// Returns `true` if the token is either a special identifier or a keyword.
583 pub fn is_reserved_ident(&self) -> bool {
584 self.is_non_raw_ident_where(Ident::is_reserved)
587 /// Returns `true` if the token is the identifier `true` or `false`.
588 pub fn is_bool_lit(&self) -> bool {
589 self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
592 pub fn is_numeric_lit(&self) -> bool {
595 Literal(Lit { kind: LitKind::Integer, .. }) | Literal(Lit { kind: LitKind::Float, .. })
599 /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
600 pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {
602 Some((id, false)) => pred(id),
607 pub fn glue(&self, joint: &Token) -> Option<Token> {
608 let kind = match self.kind {
609 Eq => match joint.kind {
614 Lt => match joint.kind {
618 BinOp(Minus) => LArrow,
621 Gt => match joint.kind {
627 Not => match joint.kind {
631 BinOp(op) => match joint.kind {
633 BinOp(And) if op == And => AndAnd,
634 BinOp(Or) if op == Or => OrOr,
635 Gt if op == Minus => RArrow,
638 Dot => match joint.kind {
643 DotDot => match joint.kind {
648 Colon => match joint.kind {
652 SingleQuote => match joint.kind {
653 Ident(name, false) => Lifetime(Symbol::intern(&format!("'{}", name))),
657 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot
658 | DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar
659 | Question | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..)
660 | Lifetime(..) | Interpolated(..) | DocComment(..) | Eof => return None,
663 Some(Token::new(kind, self.span.to(joint.span)))
667 impl PartialEq<TokenKind> for Token {
668 fn eq(&self, rhs: &TokenKind) -> bool {
673 #[derive(Clone, Encodable, Decodable)]
674 /// For interpolation during macro expansion.
675 pub enum Nonterminal {
676 NtItem(P<ast::Item>),
677 NtBlock(P<ast::Block>),
678 NtStmt(P<ast::Stmt>),
680 NtExpr(P<ast::Expr>),
682 NtIdent(Ident, /* is_raw */ bool),
684 NtLiteral(P<ast::Expr>),
685 /// Stuff inside brackets for attributes
686 NtMeta(P<ast::AttrItem>),
687 NtPath(P<ast::Path>),
688 NtVis(P<ast::Visibility>),
691 // `Nonterminal` is used a lot. Make sure it doesn't unintentionally get bigger.
692 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
693 rustc_data_structures::static_assert_size!(Nonterminal, 16);
695 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable)]
696 pub enum NonterminalKind {
701 /// Keep track of whether the user used `:pat_param` or `:pat` and we inferred it from the
702 /// edition of the span. This is used for diagnostics.
717 impl NonterminalKind {
718 /// The `edition` closure is used to get the edition for the given symbol. Doing
719 /// `span.edition()` is expensive, so we do it lazily.
722 edition: impl FnOnce() -> Edition,
723 ) -> Option<NonterminalKind> {
725 sym::item => NonterminalKind::Item,
726 sym::block => NonterminalKind::Block,
727 sym::stmt => NonterminalKind::Stmt,
728 sym::pat => match edition() {
729 Edition::Edition2015 | Edition::Edition2018 => {
730 NonterminalKind::PatParam { inferred: true }
732 Edition::Edition2021 | Edition::Edition2024 => NonterminalKind::PatWithOr,
734 sym::pat_param => NonterminalKind::PatParam { inferred: false },
735 sym::expr => NonterminalKind::Expr,
736 sym::ty => NonterminalKind::Ty,
737 sym::ident => NonterminalKind::Ident,
738 sym::lifetime => NonterminalKind::Lifetime,
739 sym::literal => NonterminalKind::Literal,
740 sym::meta => NonterminalKind::Meta,
741 sym::path => NonterminalKind::Path,
742 sym::vis => NonterminalKind::Vis,
743 sym::tt => NonterminalKind::TT,
747 fn symbol(self) -> Symbol {
749 NonterminalKind::Item => sym::item,
750 NonterminalKind::Block => sym::block,
751 NonterminalKind::Stmt => sym::stmt,
752 NonterminalKind::PatParam { inferred: false } => sym::pat_param,
753 NonterminalKind::PatParam { inferred: true } | NonterminalKind::PatWithOr => sym::pat,
754 NonterminalKind::Expr => sym::expr,
755 NonterminalKind::Ty => sym::ty,
756 NonterminalKind::Ident => sym::ident,
757 NonterminalKind::Lifetime => sym::lifetime,
758 NonterminalKind::Literal => sym::literal,
759 NonterminalKind::Meta => sym::meta,
760 NonterminalKind::Path => sym::path,
761 NonterminalKind::Vis => sym::vis,
762 NonterminalKind::TT => sym::tt,
767 impl fmt::Display for NonterminalKind {
768 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
769 write!(f, "{}", self.symbol())
774 pub fn span(&self) -> Span {
776 NtItem(item) => item.span,
777 NtBlock(block) => block.span,
778 NtStmt(stmt) => stmt.span,
779 NtPat(pat) => pat.span,
780 NtExpr(expr) | NtLiteral(expr) => expr.span,
782 NtIdent(ident, _) | NtLifetime(ident) => ident.span,
783 NtMeta(attr_item) => attr_item.span(),
784 NtPath(path) => path.span,
785 NtVis(vis) => vis.span,
790 impl PartialEq for Nonterminal {
791 fn eq(&self, rhs: &Self) -> bool {
793 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) => {
794 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs
796 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
797 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
798 // correctly based on data from AST. This will prevent them from matching each other
799 // in macros. The comparison will become possible only when each nonterminal has an
800 // attached token stream from which it was parsed.
806 impl fmt::Debug for Nonterminal {
807 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
809 NtItem(..) => f.pad("NtItem(..)"),
810 NtBlock(..) => f.pad("NtBlock(..)"),
811 NtStmt(..) => f.pad("NtStmt(..)"),
812 NtPat(..) => f.pad("NtPat(..)"),
813 NtExpr(..) => f.pad("NtExpr(..)"),
814 NtTy(..) => f.pad("NtTy(..)"),
815 NtIdent(..) => f.pad("NtIdent(..)"),
816 NtLiteral(..) => f.pad("NtLiteral(..)"),
817 NtMeta(..) => f.pad("NtMeta(..)"),
818 NtPath(..) => f.pad("NtPath(..)"),
819 NtVis(..) => f.pad("NtVis(..)"),
820 NtLifetime(..) => f.pad("NtLifetime(..)"),
825 impl<CTX> HashStable<CTX> for Nonterminal
827 CTX: crate::HashStableContext,
829 fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
830 panic!("interpolated tokens should not be present in the HIR")