2 pub use Nonterminal::*;
7 use crate::ast::{self};
8 use crate::parse::ParseSess;
9 use crate::print::pprust;
11 use crate::symbol::keywords;
12 use crate::syntax::parse::parse_stream_from_source_str;
13 use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
15 use syntax_pos::symbol::{self, Symbol};
16 use syntax_pos::{self, Span, FileName};
21 #[cfg(target_arch = "x86_64")]
22 use rustc_data_structures::static_assert;
23 use rustc_data_structures::sync::Lrc;
25 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
39 /// A delimiter token.
40 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
42 /// A round parenthesis (i.e., `(` or `)`).
44 /// A square bracket (i.e., `[` or `]`).
46 /// A curly brace (i.e., `{` or `}`).
48 /// An empty delimiter.
53 pub fn len(self) -> usize {
54 if self == NoDelim { 0 } else { 1 }
57 pub fn is_empty(self) -> bool {
62 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
70 StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */
72 ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */
76 crate fn literal_name(&self) -> &'static str {
78 Byte(_) => "byte literal",
79 Char(_) => "char literal",
80 Err(_) => "invalid literal",
81 Integer(_) => "integer literal",
82 Float(_) => "float literal",
83 Str_(_) | StrRaw(..) => "string literal",
84 ByteStr(_) | ByteStrRaw(..) => "byte string literal"
88 // See comments in `Nonterminal::to_tokenstream` for why we care about
89 // *probably* equal here rather than actual equality
90 fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
91 mem::discriminant(self) == mem::discriminant(other)
95 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
96 let ident_token: Token = Ident(ident, is_raw);
98 !ident_token.is_reserved_ident() ||
99 ident_token.is_path_segment_keyword() ||
101 keywords::Async.name(),
103 // FIXME: remove when `await!(..)` syntax is removed
104 // https://github.com/rust-lang/rust/issues/60610
105 keywords::Await.name(),
108 keywords::Box.name(),
109 keywords::Break.name(),
110 keywords::Continue.name(),
111 keywords::False.name(),
112 keywords::For.name(),
114 keywords::Loop.name(),
115 keywords::Match.name(),
116 keywords::Move.name(),
117 keywords::Return.name(),
118 keywords::True.name(),
119 keywords::Unsafe.name(),
120 keywords::While.name(),
121 keywords::Yield.name(),
122 keywords::Static.name(),
123 ].contains(&ident.name)
126 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
127 let ident_token: Token = Ident(ident, is_raw);
129 !ident_token.is_reserved_ident() ||
130 ident_token.is_path_segment_keyword() ||
132 keywords::Underscore.name(),
133 keywords::For.name(),
134 keywords::Impl.name(),
136 keywords::Unsafe.name(),
137 keywords::Extern.name(),
138 keywords::Typeof.name(),
139 keywords::Dyn.name(),
140 ].contains(&ident.name)
143 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)]
145 /* Expression-operator symbols. */
160 /* Structural symbols */
176 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
178 /// An opening delimiter (e.g., `{`).
179 OpenDelim(DelimToken),
180 /// A closing delimiter (e.g., `}`).
181 CloseDelim(DelimToken),
184 Literal(Lit, Option<ast::Name>),
186 /* Name components */
187 Ident(ast::Ident, /* is_raw */ bool),
188 Lifetime(ast::Ident),
190 Interpolated(Lrc<Nonterminal>),
192 // Can be expanded into several tokens.
194 DocComment(ast::Name),
196 // Junk. These carry no data because we don't really care about the data
197 // they *would* carry, and don't really want to allocate a new ident for
198 // them. Instead, users could extract that from the associated span.
209 // `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
210 #[cfg(target_arch = "x86_64")]
211 static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::<Token>() == 16);
214 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
215 pub fn from_ast_ident(ident: ast::Ident) -> Token {
216 Ident(ident, ident.is_raw_guess())
219 crate fn is_like_plus(&self) -> bool {
221 BinOp(Plus) | BinOpEq(Plus) => true,
226 /// Returns `true` if the token can appear at the start of an expression.
227 crate fn can_begin_expr(&self) -> bool {
229 Ident(ident, is_raw) =>
230 ident_can_begin_expr(ident, is_raw), // value name or keyword
231 OpenDelim(..) | // tuple, array or block
232 Literal(..) | // literal
233 Not | // operator not
234 BinOp(Minus) | // unary minus
235 BinOp(Star) | // dereference
236 BinOp(Or) | OrOr | // closure
237 BinOp(And) | // reference
238 AndAnd | // double reference
239 // DotDotDot is no longer supported, but we need some way to display the error
240 DotDot | DotDotDot | DotDotEq | // range notation
241 Lt | BinOp(Shl) | // associated path
242 ModSep | // global path
243 Lifetime(..) | // labeled loop
244 Pound => true, // expression attributes
245 Interpolated(ref nt) => match **nt {
251 NtLifetime(..) => true,
258 /// Returns `true` if the token can appear at the start of a type.
259 crate fn can_begin_type(&self) -> bool {
261 Ident(ident, is_raw) =>
262 ident_can_begin_type(ident, is_raw), // type name or keyword
263 OpenDelim(Paren) | // tuple
264 OpenDelim(Bracket) | // array
266 BinOp(Star) | // raw pointer
267 BinOp(And) | // reference
268 AndAnd | // double reference
269 Question | // maybe bound in trait object
270 Lifetime(..) | // lifetime bound in trait object
271 Lt | BinOp(Shl) | // associated path
272 ModSep => true, // global path
273 Interpolated(ref nt) => match **nt {
274 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
281 /// Returns `true` if the token can appear at the start of a const param.
282 pub fn can_begin_const_arg(&self) -> bool {
284 OpenDelim(Brace) => true,
285 Interpolated(ref nt) => match **nt {
288 NtLiteral(..) => true,
291 _ => self.can_begin_literal_or_bool(),
295 /// Returns `true` if the token can appear at the start of a generic bound.
296 crate fn can_begin_bound(&self) -> bool {
297 self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) ||
298 self == &Question || self == &OpenDelim(Paren)
301 /// Returns `true` if the token is any literal
302 crate fn is_lit(&self) -> bool {
309 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
310 /// for example a '-42', or one of the boolean idents).
311 crate fn can_begin_literal_or_bool(&self) -> bool {
314 BinOp(Minus) => true,
315 Ident(ident, false) if ident.name == keywords::True.name() => true,
316 Ident(ident, false) if ident.name == keywords::False.name() => true,
317 Interpolated(ref nt) => match **nt {
318 NtLiteral(..) => true,
325 /// Returns an identifier if this token is an identifier.
326 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
328 Ident(ident, is_raw) => Some((ident, is_raw)),
329 Interpolated(ref nt) => match **nt {
330 NtIdent(ident, is_raw) => Some((ident, is_raw)),
336 /// Returns a lifetime identifier if this token is a lifetime.
337 pub fn lifetime(&self) -> Option<ast::Ident> {
339 Lifetime(ident) => Some(ident),
340 Interpolated(ref nt) => match **nt {
341 NtLifetime(ident) => Some(ident),
347 /// Returns `true` if the token is an identifier.
348 pub fn is_ident(&self) -> bool {
349 self.ident().is_some()
351 /// Returns `true` if the token is a lifetime.
352 crate fn is_lifetime(&self) -> bool {
353 self.lifetime().is_some()
356 /// Returns `true` if the token is a identifier whose name is the given
358 crate fn is_ident_named(&self, name: &str) -> bool {
360 Some((ident, _)) => ident.as_str() == name,
365 /// Returns `true` if the token is an interpolated path.
366 fn is_path(&self) -> bool {
367 if let Interpolated(ref nt) = *self {
368 if let NtPath(..) = **nt {
375 /// Returns `true` if the token is either the `mut` or `const` keyword.
376 crate fn is_mutability(&self) -> bool {
377 self.is_keyword(keywords::Mut) ||
378 self.is_keyword(keywords::Const)
381 crate fn is_qpath_start(&self) -> bool {
382 self == &Lt || self == &BinOp(Shl)
385 crate fn is_path_start(&self) -> bool {
386 self == &ModSep || self.is_qpath_start() || self.is_path() ||
387 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
390 /// Returns `true` if the token is a given keyword, `kw`.
391 pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
392 self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false)
395 pub fn is_path_segment_keyword(&self) -> bool {
397 Some((id, false)) => id.is_path_segment_keyword(),
402 // Returns true for reserved identifiers used internally for elided lifetimes,
403 // unnamed method parameters, crate root module, error recovery etc.
404 pub fn is_special_ident(&self) -> bool {
406 Some((id, false)) => id.is_special(),
411 /// Returns `true` if the token is a keyword used in the language.
412 crate fn is_used_keyword(&self) -> bool {
414 Some((id, false)) => id.is_used_keyword(),
419 /// Returns `true` if the token is a keyword reserved for possible future use.
420 crate fn is_unused_keyword(&self) -> bool {
422 Some((id, false)) => id.is_unused_keyword(),
427 /// Returns `true` if the token is either a special identifier or a keyword.
428 pub fn is_reserved_ident(&self) -> bool {
430 Some((id, false)) => id.is_reserved(),
435 crate fn glue(self, joint: Token) -> Option<Token> {
446 BinOp(Minus) => LArrow,
459 BinOp(op) => match joint {
461 BinOp(And) if op == And => AndAnd,
462 BinOp(Or) if op == Or => OrOr,
463 Gt if op == Minus => RArrow,
471 DotDot => match joint {
476 Colon => match joint {
480 SingleQuote => match joint {
481 Ident(ident, false) => {
482 let name = Symbol::intern(&format!("'{}", ident));
483 Lifetime(symbol::Ident {
491 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
492 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
493 Question | OpenDelim(..) | CloseDelim(..) |
494 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
495 Whitespace | Comment | Shebang(..) | Eof => return None,
499 /// Returns tokens that are likely to be typed accidentally instead of the current token.
500 /// Enables better error recovery when the wrong token is found.
501 crate fn similar_tokens(&self) -> Option<Vec<Token>> {
503 Comma => Some(vec![Dot, Lt, Semi]),
504 Semi => Some(vec![Colon, Comma]),
509 // See comments in `Nonterminal::to_tokenstream` for why we care about
510 // *probably* equal here rather than actual equality
511 crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
512 if mem::discriminant(self) != mem::discriminant(other) {
515 match (self, other) {
530 (&DotDotDot, &DotDotDot) |
531 (&DotDotEq, &DotDotEq) |
538 (&FatArrow, &FatArrow) |
541 (&Question, &Question) |
542 (&Whitespace, &Whitespace) |
543 (&Comment, &Comment) |
544 (&Eof, &Eof) => true,
546 (&BinOp(a), &BinOp(b)) |
547 (&BinOpEq(a), &BinOpEq(b)) => a == b,
549 (&OpenDelim(a), &OpenDelim(b)) |
550 (&CloseDelim(a), &CloseDelim(b)) => a == b,
552 (&DocComment(a), &DocComment(b)) |
553 (&Shebang(a), &Shebang(b)) => a == b,
555 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
556 (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name ||
557 a.name == keywords::DollarCrate.name() ||
558 c.name == keywords::DollarCrate.name()),
560 (&Literal(ref a, b), &Literal(ref c, d)) => {
561 b == d && a.probably_equal_for_proc_macro(c)
564 (&Interpolated(_), &Interpolated(_)) => false,
566 _ => panic!("forgot to add a token?"),
571 #[derive(Clone, RustcEncodable, RustcDecodable)]
572 /// For interpolation during macro expansion.
573 pub enum Nonterminal {
574 NtItem(P<ast::Item>),
575 NtBlock(P<ast::Block>),
578 NtExpr(P<ast::Expr>),
580 NtIdent(ast::Ident, /* is_raw */ bool),
581 NtLifetime(ast::Ident),
582 NtLiteral(P<ast::Expr>),
583 /// Stuff inside brackets for attributes
584 NtMeta(ast::MetaItem),
586 NtVis(ast::Visibility),
588 // These are not exposed to macros, but are used by quasiquote.
590 NtImplItem(ast::ImplItem),
591 NtTraitItem(ast::TraitItem),
592 NtForeignItem(ast::ForeignItem),
593 NtGenerics(ast::Generics),
594 NtWhereClause(ast::WhereClause),
598 impl PartialEq for Nonterminal {
599 fn eq(&self, rhs: &Self) -> bool {
601 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
602 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
603 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
604 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
605 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
606 // correctly based on data from AST. This will prevent them from matching each other
607 // in macros. The comparison will become possible only when each nonterminal has an
608 // attached token stream from which it was parsed.
614 impl fmt::Debug for Nonterminal {
615 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
617 NtItem(..) => f.pad("NtItem(..)"),
618 NtBlock(..) => f.pad("NtBlock(..)"),
619 NtStmt(..) => f.pad("NtStmt(..)"),
620 NtPat(..) => f.pad("NtPat(..)"),
621 NtExpr(..) => f.pad("NtExpr(..)"),
622 NtTy(..) => f.pad("NtTy(..)"),
623 NtIdent(..) => f.pad("NtIdent(..)"),
624 NtLiteral(..) => f.pad("NtLiteral(..)"),
625 NtMeta(..) => f.pad("NtMeta(..)"),
626 NtPath(..) => f.pad("NtPath(..)"),
627 NtTT(..) => f.pad("NtTT(..)"),
628 NtArm(..) => f.pad("NtArm(..)"),
629 NtImplItem(..) => f.pad("NtImplItem(..)"),
630 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
631 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
632 NtGenerics(..) => f.pad("NtGenerics(..)"),
633 NtWhereClause(..) => f.pad("NtWhereClause(..)"),
634 NtArg(..) => f.pad("NtArg(..)"),
635 NtVis(..) => f.pad("NtVis(..)"),
636 NtLifetime(..) => f.pad("NtLifetime(..)"),
642 pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
643 // A `Nonterminal` is often a parsed AST item. At this point we now
644 // need to convert the parsed AST to an actual token stream, e.g.
645 // un-parse it basically.
647 // Unfortunately there's not really a great way to do that in a
648 // guaranteed lossless fashion right now. The fallback here is to just
649 // stringify the AST node and reparse it, but this loses all span
652 // As a result, some AST nodes are annotated with the token stream they
653 // came from. Here we attempt to extract these lossless token streams
654 // before we fall back to the stringification.
655 let tokens = match *self {
656 Nonterminal::NtItem(ref item) => {
657 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
659 Nonterminal::NtTraitItem(ref item) => {
660 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
662 Nonterminal::NtImplItem(ref item) => {
663 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
665 Nonterminal::NtIdent(ident, is_raw) => {
666 let token = Token::Ident(ident, is_raw);
667 Some(TokenTree::Token(ident.span, token).into())
669 Nonterminal::NtLifetime(ident) => {
670 let token = Token::Lifetime(ident);
671 Some(TokenTree::Token(ident.span, token).into())
673 Nonterminal::NtTT(ref tt) => {
674 Some(tt.clone().into())
679 // FIXME(#43081): Avoid this pretty-print + reparse hack
680 let source = pprust::nonterminal_to_string(self);
681 let filename = FileName::macro_expansion_source_code(&source);
682 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
684 // During early phases of the compiler the AST could get modified
685 // directly (e.g., attributes added or removed) and the internal cache
686 // of tokens my not be invalidated or updated. Consequently if the
687 // "lossless" token stream disagrees with our actual stringification
688 // (which has historically been much more battle-tested) then we go
689 // with the lossy stream anyway (losing span information).
691 // Note that the comparison isn't `==` here to avoid comparing spans,
692 // but it *also* is a "probable" equality which is a pretty weird
693 // definition. We mostly want to catch actual changes to the AST
694 // like a `#[cfg]` being processed or some weird `macro_rules!`
697 // What we *don't* want to catch is the fact that a user-defined
698 // literal like `0xf` is stringified as `15`, causing the cached token
699 // stream to not be literal `==` token-wise (ignoring spans) to the
700 // token stream we got from stringification.
702 // Instead the "probably equal" check here is "does each token
703 // recursively have the same discriminant?" We basically don't look at
704 // the token values here and assume that such fine grained token stream
705 // modifications, including adding/removing typically non-semantic
706 // tokens such as extra braces and commas, don't happen.
707 if let Some(tokens) = tokens {
708 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
711 info!("cached tokens found, but they're not \"probably equal\", \
712 going with stringified version");
714 return tokens_for_real
718 crate fn is_op(tok: &Token) -> bool {
720 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
721 Ident(..) | Lifetime(..) | Interpolated(..) |
722 Whitespace | Comment | Shebang(..) | Eof => false,
727 fn prepend_attrs(sess: &ParseSess,
728 attrs: &[ast::Attribute],
729 tokens: Option<&tokenstream::TokenStream>,
730 span: syntax_pos::Span)
731 -> Option<tokenstream::TokenStream>
733 let tokens = tokens?;
734 if attrs.len() == 0 {
735 return Some(tokens.clone())
737 let mut builder = tokenstream::TokenStreamBuilder::new();
739 assert_eq!(attr.style, ast::AttrStyle::Outer,
740 "inner attributes should prevent cached tokens from existing");
742 let source = pprust::attr_to_string(attr);
743 let macro_filename = FileName::macro_expansion_source_code(&source);
744 if attr.is_sugared_doc {
745 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
746 builder.push(stream);
750 // synthesize # [ $path $tokens ] manually here
751 let mut brackets = tokenstream::TokenStreamBuilder::new();
753 // For simple paths, push the identifier directly
754 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
755 let ident = attr.path.segments[0].ident;
756 let token = Ident(ident, ident.as_str().starts_with("r#"));
757 brackets.push(tokenstream::TokenTree::Token(ident.span, token));
759 // ... and for more complicated paths, fall back to a reparse hack that
760 // should eventually be removed.
762 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
763 brackets.push(stream);
766 brackets.push(attr.tokens.clone());
768 // The span we list here for `#` and for `[ ... ]` are both wrong in
769 // that it encompasses more than each token, but it hopefully is "good
770 // enough" for now at least.
771 builder.push(tokenstream::TokenTree::Token(attr.span, Pound));
772 let delim_span = DelimSpan::from_single(attr.span);
773 builder.push(tokenstream::TokenTree::Delimited(
774 delim_span, DelimToken::Bracket, brackets.build().into()));
776 builder.push(tokens.clone());
777 Some(builder.build())