2 pub use Nonterminal::*;
7 use crate::ast::{self};
8 use crate::parse::{parse_stream_from_source_str, ParseSess};
9 use crate::print::pprust;
11 use crate::symbol::kw;
12 use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
14 use syntax_pos::symbol::Symbol;
15 use syntax_pos::{self, Span, FileName, DUMMY_SP};
20 #[cfg(target_arch = "x86_64")]
21 use rustc_data_structures::static_assert_size;
22 use rustc_data_structures::sync::Lrc;
24 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
38 /// A delimiter token.
39 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
41 /// A round parenthesis (i.e., `(` or `)`).
43 /// A square bracket (i.e., `[` or `]`).
45 /// A curly brace (i.e., `{` or `}`).
47 /// An empty delimiter.
52 pub fn len(self) -> usize {
53 if self == NoDelim { 0 } else { 1 }
56 pub fn is_empty(self) -> bool {
61 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
63 Bool, // AST only, must never appear in a `Token`
69 StrRaw(u16), // raw string delimited by `n` hash symbols
71 ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols
76 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
80 pub suffix: Option<Symbol>,
83 impl fmt::Display for Lit {
84 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85 let Lit { kind, symbol, suffix } = *self;
87 Byte => write!(f, "b'{}'", symbol)?,
88 Char => write!(f, "'{}'", symbol)?,
89 Str => write!(f, "\"{}\"", symbol)?,
90 StrRaw(n) => write!(f, "r{delim}\"{string}\"{delim}",
91 delim="#".repeat(n as usize),
93 ByteStr => write!(f, "b\"{}\"", symbol)?,
94 ByteStrRaw(n) => write!(f, "br{delim}\"{string}\"{delim}",
95 delim="#".repeat(n as usize),
100 Err => write!(f, "{}", symbol)?,
103 if let Some(suffix) = suffix {
104 write!(f, "{}", suffix)?;
112 /// An English article for the literal token kind.
113 crate fn article(self) -> &'static str {
115 Integer | Err => "an",
120 crate fn descr(self) -> &'static str {
122 Bool => panic!("literal token contains `Lit::Bool`"),
125 Integer => "integer",
127 Str | StrRaw(..) => "string",
128 ByteStr | ByteStrRaw(..) => "byte string",
133 crate fn may_have_suffix(self) -> bool {
135 Integer | Float | Err => true,
142 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
143 Lit { kind, symbol, suffix }
147 pub(crate) fn ident_can_begin_expr(name: ast::Name, span: Span, is_raw: bool) -> bool {
148 let ident_token = Token::new(Ident(name, is_raw), span);
150 !ident_token.is_reserved_ident() ||
151 ident_token.is_path_segment_keyword() ||
155 // FIXME: remove when `await!(..)` syntax is removed
156 // https://github.com/rust-lang/rust/issues/60610
179 fn ident_can_begin_type(name: ast::Name, span: Span, is_raw: bool) -> bool {
180 let ident_token = Token::new(Ident(name, is_raw), span);
182 !ident_token.is_reserved_ident() ||
183 ident_token.is_path_segment_keyword() ||
196 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)]
198 /* Expression-operator symbols. */
213 /* Structural symbols */
229 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
231 /// An opening delimiter (e.g., `{`).
232 OpenDelim(DelimToken),
233 /// A closing delimiter (e.g., `}`).
234 CloseDelim(DelimToken),
239 /* Name components */
240 Ident(ast::Name, /* is_raw */ bool),
243 Interpolated(Lrc<Nonterminal>),
245 // Can be expanded into several tokens.
247 DocComment(ast::Name),
249 // Junk. These carry no data because we don't really care about the data
250 // they *would* carry, and don't really want to allocate a new ident for
251 // them. Instead, users could extract that from the associated span.
258 /// A completely invalid token which should be skipped.
264 // `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger.
265 #[cfg(target_arch = "x86_64")]
266 static_assert_size!(TokenKind, 16);
268 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)]
275 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
276 Literal(Lit::new(kind, symbol, suffix))
279 /// Returns tokens that are likely to be typed accidentally instead of the current token.
280 /// Enables better error recovery when the wrong token is found.
281 crate fn similar_tokens(&self) -> Option<Vec<TokenKind>> {
283 Comma => Some(vec![Dot, Lt, Semi]),
284 Semi => Some(vec![Colon, Comma]),
291 crate fn new(kind: TokenKind, span: Span) -> Self {
295 /// Some token that will be thrown away later.
296 crate fn dummy() -> Self {
297 Token::new(TokenKind::Whitespace, DUMMY_SP)
300 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
301 crate fn from_ast_ident(ident: ast::Ident) -> Self {
302 Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span)
305 /// Return this token by value and leave a dummy token in its place.
306 crate fn take(&mut self) -> Self {
307 mem::replace(self, Token::dummy())
310 crate fn is_op(&self) -> bool {
312 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
313 Ident(..) | Lifetime(..) | Interpolated(..) |
314 Whitespace | Comment | Shebang(..) | Eof => false,
319 crate fn is_like_plus(&self) -> bool {
321 BinOp(Plus) | BinOpEq(Plus) => true,
326 /// Returns `true` if the token can appear at the start of an expression.
327 crate fn can_begin_expr(&self) -> bool {
329 Ident(name, is_raw) =>
330 ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
331 OpenDelim(..) | // tuple, array or block
332 Literal(..) | // literal
333 Not | // operator not
334 BinOp(Minus) | // unary minus
335 BinOp(Star) | // dereference
336 BinOp(Or) | OrOr | // closure
337 BinOp(And) | // reference
338 AndAnd | // double reference
339 // DotDotDot is no longer supported, but we need some way to display the error
340 DotDot | DotDotDot | DotDotEq | // range notation
341 Lt | BinOp(Shl) | // associated path
342 ModSep | // global path
343 Lifetime(..) | // labeled loop
344 Pound => true, // expression attributes
345 Interpolated(ref nt) => match **nt {
351 NtLifetime(..) => true,
358 /// Returns `true` if the token can appear at the start of a type.
359 crate fn can_begin_type(&self) -> bool {
361 Ident(name, is_raw) =>
362 ident_can_begin_type(name, self.span, is_raw), // type name or keyword
363 OpenDelim(Paren) | // tuple
364 OpenDelim(Bracket) | // array
366 BinOp(Star) | // raw pointer
367 BinOp(And) | // reference
368 AndAnd | // double reference
369 Question | // maybe bound in trait object
370 Lifetime(..) | // lifetime bound in trait object
371 Lt | BinOp(Shl) | // associated path
372 ModSep => true, // global path
373 Interpolated(ref nt) => match **nt {
374 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
381 /// Returns `true` if the token can appear at the start of a const param.
382 crate fn can_begin_const_arg(&self) -> bool {
384 OpenDelim(Brace) => true,
385 Interpolated(ref nt) => match **nt {
388 NtLiteral(..) => true,
391 _ => self.can_begin_literal_or_bool(),
395 /// Returns `true` if the token can appear at the start of a generic bound.
396 crate fn can_begin_bound(&self) -> bool {
397 self.is_path_start() || self.is_lifetime() || self.is_keyword(kw::For) ||
398 self == &Question || self == &OpenDelim(Paren)
401 /// Returns `true` if the token is any literal
402 crate fn is_lit(&self) -> bool {
409 crate fn expect_lit(&self) -> Lit {
412 _ => panic!("`expect_lit` called on non-literal"),
416 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
417 /// for example a '-42', or one of the boolean idents).
418 crate fn can_begin_literal_or_bool(&self) -> bool {
420 Literal(..) | BinOp(Minus) => true,
421 Ident(name, false) if name.is_bool_lit() => true,
422 Interpolated(ref nt) => match **nt {
423 NtLiteral(..) => true,
430 /// Returns an identifier if this token is an identifier.
431 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
433 Ident(name, is_raw) => Some((ast::Ident::new(name, self.span), is_raw)),
434 Interpolated(ref nt) => match **nt {
435 NtIdent(ident, is_raw) => Some((ident, is_raw)),
442 /// Returns a lifetime identifier if this token is a lifetime.
443 pub fn lifetime(&self) -> Option<ast::Ident> {
445 Lifetime(name) => Some(ast::Ident::new(name, self.span)),
446 Interpolated(ref nt) => match **nt {
447 NtLifetime(ident) => Some(ident),
454 /// Returns `true` if the token is an identifier.
455 pub fn is_ident(&self) -> bool {
456 self.ident().is_some()
459 /// Returns `true` if the token is a lifetime.
460 crate fn is_lifetime(&self) -> bool {
461 self.lifetime().is_some()
464 /// Returns `true` if the token is a identifier whose name is the given
466 crate fn is_ident_named(&self, name: Symbol) -> bool {
467 self.ident().map_or(false, |(ident, _)| ident.name == name)
470 /// Returns `true` if the token is an interpolated path.
471 fn is_path(&self) -> bool {
472 if let Interpolated(ref nt) = self.kind {
473 if let NtPath(..) = **nt {
480 /// Would `maybe_whole_expr` in `parser.rs` return `Ok(..)`?
481 /// That is, is this a pre-parsed expression dropped into the token stream
482 /// (which happens while parsing the result of macro expansion)?
483 crate fn is_whole_expr(&self) -> bool {
484 if let Interpolated(ref nt) = self.kind {
485 if let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtIdent(..) | NtBlock(_) = **nt {
493 /// Returns `true` if the token is either the `mut` or `const` keyword.
494 crate fn is_mutability(&self) -> bool {
495 self.is_keyword(kw::Mut) ||
496 self.is_keyword(kw::Const)
499 crate fn is_qpath_start(&self) -> bool {
500 self == &Lt || self == &BinOp(Shl)
503 crate fn is_path_start(&self) -> bool {
504 self == &ModSep || self.is_qpath_start() || self.is_path() ||
505 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
508 /// Returns `true` if the token is a given keyword, `kw`.
509 pub fn is_keyword(&self, kw: Symbol) -> bool {
510 self.is_non_raw_ident_where(|id| id.name == kw)
513 crate fn is_path_segment_keyword(&self) -> bool {
514 self.is_non_raw_ident_where(ast::Ident::is_path_segment_keyword)
517 // Returns true for reserved identifiers used internally for elided lifetimes,
518 // unnamed method parameters, crate root module, error recovery etc.
519 crate fn is_special_ident(&self) -> bool {
520 self.is_non_raw_ident_where(ast::Ident::is_special)
523 /// Returns `true` if the token is a keyword used in the language.
524 crate fn is_used_keyword(&self) -> bool {
525 self.is_non_raw_ident_where(ast::Ident::is_used_keyword)
528 /// Returns `true` if the token is a keyword reserved for possible future use.
529 crate fn is_unused_keyword(&self) -> bool {
530 self.is_non_raw_ident_where(ast::Ident::is_unused_keyword)
533 /// Returns `true` if the token is either a special identifier or a keyword.
534 pub fn is_reserved_ident(&self) -> bool {
535 self.is_non_raw_ident_where(ast::Ident::is_reserved)
538 /// Returns `true` if the token is the identifier `true` or `false`.
539 crate fn is_bool_lit(&self) -> bool {
540 self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
543 /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
544 fn is_non_raw_ident_where(&self, pred: impl FnOnce(ast::Ident) -> bool) -> bool {
546 Some((id, false)) => pred(id),
551 crate fn glue(&self, joint: &Token) -> Option<Token> {
552 let kind = match self.kind {
553 Eq => match joint.kind {
558 Lt => match joint.kind {
562 BinOp(Minus) => LArrow,
565 Gt => match joint.kind {
571 Not => match joint.kind {
575 BinOp(op) => match joint.kind {
577 BinOp(And) if op == And => AndAnd,
578 BinOp(Or) if op == Or => OrOr,
579 Gt if op == Minus => RArrow,
582 Dot => match joint.kind {
587 DotDot => match joint.kind {
592 Colon => match joint.kind {
596 SingleQuote => match joint.kind {
597 Ident(name, false) => Lifetime(Symbol::intern(&format!("'{}", name))),
601 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
602 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
603 Question | OpenDelim(..) | CloseDelim(..) |
604 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
605 Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
608 Some(Token::new(kind, self.span.to(joint.span)))
611 // See comments in `Nonterminal::to_tokenstream` for why we care about
612 // *probably* equal here rather than actual equality
613 crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
614 if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
617 match (&self.kind, &other.kind) {
632 (&DotDotDot, &DotDotDot) |
633 (&DotDotEq, &DotDotEq) |
640 (&FatArrow, &FatArrow) |
643 (&Question, &Question) |
644 (&Whitespace, &Whitespace) |
645 (&Comment, &Comment) |
646 (&Eof, &Eof) => true,
648 (&BinOp(a), &BinOp(b)) |
649 (&BinOpEq(a), &BinOpEq(b)) => a == b,
651 (&OpenDelim(a), &OpenDelim(b)) |
652 (&CloseDelim(a), &CloseDelim(b)) => a == b,
654 (&DocComment(a), &DocComment(b)) |
655 (&Shebang(a), &Shebang(b)) => a == b,
657 (&Literal(a), &Literal(b)) => a == b,
659 (&Lifetime(a), &Lifetime(b)) => a == b,
660 (&Ident(a, b), &Ident(c, d)) => b == d && (a == c ||
661 a == kw::DollarCrate ||
662 c == kw::DollarCrate),
664 (&Interpolated(_), &Interpolated(_)) => false,
666 _ => panic!("forgot to add a token?"),
671 impl PartialEq<TokenKind> for Token {
672 fn eq(&self, rhs: &TokenKind) -> bool {
677 #[derive(Clone, RustcEncodable, RustcDecodable)]
678 /// For interpolation during macro expansion.
679 pub enum Nonterminal {
680 NtItem(P<ast::Item>),
681 NtBlock(P<ast::Block>),
684 NtExpr(P<ast::Expr>),
686 NtIdent(ast::Ident, /* is_raw */ bool),
687 NtLifetime(ast::Ident),
688 NtLiteral(P<ast::Expr>),
689 /// Stuff inside brackets for attributes
690 NtMeta(ast::AttrItem),
692 NtVis(ast::Visibility),
694 // Used only for passing items to proc macro attributes (they are not
695 // strictly necessary for that, `Annotatable` can be converted into
696 // tokens directly, but doing that naively regresses pretty-printing).
697 NtTraitItem(ast::TraitItem),
698 NtImplItem(ast::ImplItem),
699 NtForeignItem(ast::ForeignItem),
702 impl PartialEq for Nonterminal {
703 fn eq(&self, rhs: &Self) -> bool {
705 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
706 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
707 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
708 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
709 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
710 // correctly based on data from AST. This will prevent them from matching each other
711 // in macros. The comparison will become possible only when each nonterminal has an
712 // attached token stream from which it was parsed.
718 impl fmt::Debug for Nonterminal {
719 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
721 NtItem(..) => f.pad("NtItem(..)"),
722 NtBlock(..) => f.pad("NtBlock(..)"),
723 NtStmt(..) => f.pad("NtStmt(..)"),
724 NtPat(..) => f.pad("NtPat(..)"),
725 NtExpr(..) => f.pad("NtExpr(..)"),
726 NtTy(..) => f.pad("NtTy(..)"),
727 NtIdent(..) => f.pad("NtIdent(..)"),
728 NtLiteral(..) => f.pad("NtLiteral(..)"),
729 NtMeta(..) => f.pad("NtMeta(..)"),
730 NtPath(..) => f.pad("NtPath(..)"),
731 NtTT(..) => f.pad("NtTT(..)"),
732 NtImplItem(..) => f.pad("NtImplItem(..)"),
733 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
734 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
735 NtVis(..) => f.pad("NtVis(..)"),
736 NtLifetime(..) => f.pad("NtLifetime(..)"),
742 pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
743 // A `Nonterminal` is often a parsed AST item. At this point we now
744 // need to convert the parsed AST to an actual token stream, e.g.
745 // un-parse it basically.
747 // Unfortunately there's not really a great way to do that in a
748 // guaranteed lossless fashion right now. The fallback here is to just
749 // stringify the AST node and reparse it, but this loses all span
752 // As a result, some AST nodes are annotated with the token stream they
753 // came from. Here we attempt to extract these lossless token streams
754 // before we fall back to the stringification.
755 let tokens = match *self {
756 Nonterminal::NtItem(ref item) => {
757 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
759 Nonterminal::NtTraitItem(ref item) => {
760 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
762 Nonterminal::NtImplItem(ref item) => {
763 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
765 Nonterminal::NtIdent(ident, is_raw) => {
766 Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
768 Nonterminal::NtLifetime(ident) => {
769 Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
771 Nonterminal::NtTT(ref tt) => {
772 Some(tt.clone().into())
777 // FIXME(#43081): Avoid this pretty-print + reparse hack
778 let source = pprust::nonterminal_to_string(self);
779 let filename = FileName::macro_expansion_source_code(&source);
780 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
782 // During early phases of the compiler the AST could get modified
783 // directly (e.g., attributes added or removed) and the internal cache
784 // of tokens my not be invalidated or updated. Consequently if the
785 // "lossless" token stream disagrees with our actual stringification
786 // (which has historically been much more battle-tested) then we go
787 // with the lossy stream anyway (losing span information).
789 // Note that the comparison isn't `==` here to avoid comparing spans,
790 // but it *also* is a "probable" equality which is a pretty weird
791 // definition. We mostly want to catch actual changes to the AST
792 // like a `#[cfg]` being processed or some weird `macro_rules!`
795 // What we *don't* want to catch is the fact that a user-defined
796 // literal like `0xf` is stringified as `15`, causing the cached token
797 // stream to not be literal `==` token-wise (ignoring spans) to the
798 // token stream we got from stringification.
800 // Instead the "probably equal" check here is "does each token
801 // recursively have the same discriminant?" We basically don't look at
802 // the token values here and assume that such fine grained token stream
803 // modifications, including adding/removing typically non-semantic
804 // tokens such as extra braces and commas, don't happen.
805 if let Some(tokens) = tokens {
806 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
809 info!("cached tokens found, but they're not \"probably equal\", \
810 going with stringified version");
812 return tokens_for_real
816 fn prepend_attrs(sess: &ParseSess,
817 attrs: &[ast::Attribute],
818 tokens: Option<&tokenstream::TokenStream>,
819 span: syntax_pos::Span)
820 -> Option<tokenstream::TokenStream>
822 let tokens = tokens?;
823 if attrs.len() == 0 {
824 return Some(tokens.clone())
826 let mut builder = tokenstream::TokenStreamBuilder::new();
828 assert_eq!(attr.style, ast::AttrStyle::Outer,
829 "inner attributes should prevent cached tokens from existing");
831 let source = pprust::attribute_to_string(attr);
832 let macro_filename = FileName::macro_expansion_source_code(&source);
833 if attr.is_sugared_doc {
834 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
835 builder.push(stream);
839 // synthesize # [ $path $tokens ] manually here
840 let mut brackets = tokenstream::TokenStreamBuilder::new();
842 // For simple paths, push the identifier directly
843 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
844 let ident = attr.path.segments[0].ident;
845 let token = Ident(ident.name, ident.as_str().starts_with("r#"));
846 brackets.push(tokenstream::TokenTree::token(token, ident.span));
848 // ... and for more complicated paths, fall back to a reparse hack that
849 // should eventually be removed.
851 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
852 brackets.push(stream);
855 brackets.push(attr.tokens.clone());
857 // The span we list here for `#` and for `[ ... ]` are both wrong in
858 // that it encompasses more than each token, but it hopefully is "good
859 // enough" for now at least.
860 builder.push(tokenstream::TokenTree::token(Pound, attr.span));
861 let delim_span = DelimSpan::from_single(attr.span);
862 builder.push(tokenstream::TokenTree::Delimited(
863 delim_span, DelimToken::Bracket, brackets.build().into()));
865 builder.push(tokens.clone());
866 Some(builder.build())