2 pub use Nonterminal::*;
7 use crate::ast::{self};
8 use crate::parse::ParseSess;
9 use crate::print::pprust;
11 use crate::symbol::keywords;
12 use crate::syntax::parse::parse_stream_from_source_str;
13 use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
15 use syntax_pos::symbol::{self, Symbol};
16 use syntax_pos::{self, Span, FileName};
21 #[cfg(target_arch = "x86_64")]
22 use rustc_data_structures::static_assert_size;
23 use rustc_data_structures::sync::Lrc;
25 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
39 /// A delimiter token.
40 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
42 /// A round parenthesis (i.e., `(` or `)`).
44 /// A square bracket (i.e., `[` or `]`).
46 /// A curly brace (i.e., `{` or `}`).
48 /// An empty delimiter.
53 pub fn len(self) -> usize {
54 if self == NoDelim { 0 } else { 1 }
57 pub fn is_empty(self) -> bool {
62 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
64 Bool(ast::Name), // AST only, must never appear in a `Token`
71 StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */
73 ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */
76 #[cfg(target_arch = "x86_64")]
77 static_assert_size!(Lit, 8);
80 crate fn literal_name(&self) -> &'static str {
82 Bool(_) => panic!("literal token contains `Lit::Bool`"),
83 Byte(_) => "byte literal",
84 Char(_) => "char literal",
85 Err(_) => "invalid literal",
86 Integer(_) => "integer literal",
87 Float(_) => "float literal",
88 Str_(_) | StrRaw(..) => "string literal",
89 ByteStr(_) | ByteStrRaw(..) => "byte string literal"
93 crate fn may_have_suffix(&self) -> bool {
95 Integer(..) | Float(..) => true,
100 // See comments in `Nonterminal::to_tokenstream` for why we care about
101 // *probably* equal here rather than actual equality
102 fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
103 mem::discriminant(self) == mem::discriminant(other)
107 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
108 let ident_token: Token = Ident(ident, is_raw);
110 !ident_token.is_reserved_ident() ||
111 ident_token.is_path_segment_keyword() ||
113 keywords::Async.name(),
115 // FIXME: remove when `await!(..)` syntax is removed
116 // https://github.com/rust-lang/rust/issues/60610
117 keywords::Await.name(),
120 keywords::Box.name(),
121 keywords::Break.name(),
122 keywords::Continue.name(),
123 keywords::False.name(),
124 keywords::For.name(),
126 keywords::Loop.name(),
127 keywords::Match.name(),
128 keywords::Move.name(),
129 keywords::Return.name(),
130 keywords::True.name(),
131 keywords::Unsafe.name(),
132 keywords::While.name(),
133 keywords::Yield.name(),
134 keywords::Static.name(),
135 ].contains(&ident.name)
138 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
139 let ident_token: Token = Ident(ident, is_raw);
141 !ident_token.is_reserved_ident() ||
142 ident_token.is_path_segment_keyword() ||
144 keywords::Underscore.name(),
145 keywords::For.name(),
146 keywords::Impl.name(),
148 keywords::Unsafe.name(),
149 keywords::Extern.name(),
150 keywords::Typeof.name(),
151 keywords::Dyn.name(),
152 ].contains(&ident.name)
155 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)]
157 /* Expression-operator symbols. */
172 /* Structural symbols */
188 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
190 /// An opening delimiter (e.g., `{`).
191 OpenDelim(DelimToken),
192 /// A closing delimiter (e.g., `}`).
193 CloseDelim(DelimToken),
196 Literal(Lit, Option<ast::Name>),
198 /* Name components */
199 Ident(ast::Ident, /* is_raw */ bool),
200 Lifetime(ast::Ident),
202 Interpolated(Lrc<Nonterminal>),
204 // Can be expanded into several tokens.
206 DocComment(ast::Name),
208 // Junk. These carry no data because we don't really care about the data
209 // they *would* carry, and don't really want to allocate a new ident for
210 // them. Instead, users could extract that from the associated span.
221 // `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
222 #[cfg(target_arch = "x86_64")]
223 static_assert_size!(Token, 16);
226 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
227 pub fn from_ast_ident(ident: ast::Ident) -> Token {
228 Ident(ident, ident.is_raw_guess())
231 crate fn is_like_plus(&self) -> bool {
233 BinOp(Plus) | BinOpEq(Plus) => true,
238 /// Returns `true` if the token can appear at the start of an expression.
239 crate fn can_begin_expr(&self) -> bool {
241 Ident(ident, is_raw) =>
242 ident_can_begin_expr(ident, is_raw), // value name or keyword
243 OpenDelim(..) | // tuple, array or block
244 Literal(..) | // literal
245 Not | // operator not
246 BinOp(Minus) | // unary minus
247 BinOp(Star) | // dereference
248 BinOp(Or) | OrOr | // closure
249 BinOp(And) | // reference
250 AndAnd | // double reference
251 // DotDotDot is no longer supported, but we need some way to display the error
252 DotDot | DotDotDot | DotDotEq | // range notation
253 Lt | BinOp(Shl) | // associated path
254 ModSep | // global path
255 Lifetime(..) | // labeled loop
256 Pound => true, // expression attributes
257 Interpolated(ref nt) => match **nt {
263 NtLifetime(..) => true,
270 /// Returns `true` if the token can appear at the start of a type.
271 crate fn can_begin_type(&self) -> bool {
273 Ident(ident, is_raw) =>
274 ident_can_begin_type(ident, is_raw), // type name or keyword
275 OpenDelim(Paren) | // tuple
276 OpenDelim(Bracket) | // array
278 BinOp(Star) | // raw pointer
279 BinOp(And) | // reference
280 AndAnd | // double reference
281 Question | // maybe bound in trait object
282 Lifetime(..) | // lifetime bound in trait object
283 Lt | BinOp(Shl) | // associated path
284 ModSep => true, // global path
285 Interpolated(ref nt) => match **nt {
286 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
293 /// Returns `true` if the token can appear at the start of a const param.
294 pub fn can_begin_const_arg(&self) -> bool {
296 OpenDelim(Brace) => true,
297 Interpolated(ref nt) => match **nt {
300 NtLiteral(..) => true,
303 _ => self.can_begin_literal_or_bool(),
307 /// Returns `true` if the token can appear at the start of a generic bound.
308 crate fn can_begin_bound(&self) -> bool {
309 self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) ||
310 self == &Question || self == &OpenDelim(Paren)
313 /// Returns `true` if the token is any literal
314 crate fn is_lit(&self) -> bool {
321 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
322 /// for example a '-42', or one of the boolean idents).
323 crate fn can_begin_literal_or_bool(&self) -> bool {
326 BinOp(Minus) => true,
327 Ident(ident, false) if ident.name == keywords::True.name() => true,
328 Ident(ident, false) if ident.name == keywords::False.name() => true,
329 Interpolated(ref nt) => match **nt {
330 NtLiteral(..) => true,
337 /// Returns an identifier if this token is an identifier.
338 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
340 Ident(ident, is_raw) => Some((ident, is_raw)),
341 Interpolated(ref nt) => match **nt {
342 NtIdent(ident, is_raw) => Some((ident, is_raw)),
348 /// Returns a lifetime identifier if this token is a lifetime.
349 pub fn lifetime(&self) -> Option<ast::Ident> {
351 Lifetime(ident) => Some(ident),
352 Interpolated(ref nt) => match **nt {
353 NtLifetime(ident) => Some(ident),
359 /// Returns `true` if the token is an identifier.
360 pub fn is_ident(&self) -> bool {
361 self.ident().is_some()
363 /// Returns `true` if the token is a lifetime.
364 crate fn is_lifetime(&self) -> bool {
365 self.lifetime().is_some()
368 /// Returns `true` if the token is a identifier whose name is the given
370 crate fn is_ident_named(&self, name: &str) -> bool {
372 Some((ident, _)) => ident.as_str() == name,
377 /// Returns `true` if the token is an interpolated path.
378 fn is_path(&self) -> bool {
379 if let Interpolated(ref nt) = *self {
380 if let NtPath(..) = **nt {
387 /// Returns `true` if the token is either the `mut` or `const` keyword.
388 crate fn is_mutability(&self) -> bool {
389 self.is_keyword(keywords::Mut) ||
390 self.is_keyword(keywords::Const)
393 crate fn is_qpath_start(&self) -> bool {
394 self == &Lt || self == &BinOp(Shl)
397 crate fn is_path_start(&self) -> bool {
398 self == &ModSep || self.is_qpath_start() || self.is_path() ||
399 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
402 /// Returns `true` if the token is a given keyword, `kw`.
403 pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
404 self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false)
407 pub fn is_path_segment_keyword(&self) -> bool {
409 Some((id, false)) => id.is_path_segment_keyword(),
414 // Returns true for reserved identifiers used internally for elided lifetimes,
415 // unnamed method parameters, crate root module, error recovery etc.
416 pub fn is_special_ident(&self) -> bool {
418 Some((id, false)) => id.is_special(),
423 /// Returns `true` if the token is a keyword used in the language.
424 crate fn is_used_keyword(&self) -> bool {
426 Some((id, false)) => id.is_used_keyword(),
431 /// Returns `true` if the token is a keyword reserved for possible future use.
432 crate fn is_unused_keyword(&self) -> bool {
434 Some((id, false)) => id.is_unused_keyword(),
439 /// Returns `true` if the token is either a special identifier or a keyword.
440 pub fn is_reserved_ident(&self) -> bool {
442 Some((id, false)) => id.is_reserved(),
447 crate fn glue(self, joint: Token) -> Option<Token> {
458 BinOp(Minus) => LArrow,
471 BinOp(op) => match joint {
473 BinOp(And) if op == And => AndAnd,
474 BinOp(Or) if op == Or => OrOr,
475 Gt if op == Minus => RArrow,
483 DotDot => match joint {
488 Colon => match joint {
492 SingleQuote => match joint {
493 Ident(ident, false) => {
494 let name = Symbol::intern(&format!("'{}", ident));
495 Lifetime(symbol::Ident {
503 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
504 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
505 Question | OpenDelim(..) | CloseDelim(..) |
506 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
507 Whitespace | Comment | Shebang(..) | Eof => return None,
511 /// Returns tokens that are likely to be typed accidentally instead of the current token.
512 /// Enables better error recovery when the wrong token is found.
513 crate fn similar_tokens(&self) -> Option<Vec<Token>> {
515 Comma => Some(vec![Dot, Lt, Semi]),
516 Semi => Some(vec![Colon, Comma]),
521 // See comments in `Nonterminal::to_tokenstream` for why we care about
522 // *probably* equal here rather than actual equality
523 crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
524 if mem::discriminant(self) != mem::discriminant(other) {
527 match (self, other) {
542 (&DotDotDot, &DotDotDot) |
543 (&DotDotEq, &DotDotEq) |
550 (&FatArrow, &FatArrow) |
553 (&Question, &Question) |
554 (&Whitespace, &Whitespace) |
555 (&Comment, &Comment) |
556 (&Eof, &Eof) => true,
558 (&BinOp(a), &BinOp(b)) |
559 (&BinOpEq(a), &BinOpEq(b)) => a == b,
561 (&OpenDelim(a), &OpenDelim(b)) |
562 (&CloseDelim(a), &CloseDelim(b)) => a == b,
564 (&DocComment(a), &DocComment(b)) |
565 (&Shebang(a), &Shebang(b)) => a == b,
567 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
568 (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name ||
569 a.name == keywords::DollarCrate.name() ||
570 c.name == keywords::DollarCrate.name()),
572 (&Literal(ref a, b), &Literal(ref c, d)) => {
573 b == d && a.probably_equal_for_proc_macro(c)
576 (&Interpolated(_), &Interpolated(_)) => false,
578 _ => panic!("forgot to add a token?"),
583 #[derive(Clone, RustcEncodable, RustcDecodable)]
584 /// For interpolation during macro expansion.
585 pub enum Nonterminal {
586 NtItem(P<ast::Item>),
587 NtBlock(P<ast::Block>),
590 NtExpr(P<ast::Expr>),
592 NtIdent(ast::Ident, /* is_raw */ bool),
593 NtLifetime(ast::Ident),
594 NtLiteral(P<ast::Expr>),
595 /// Stuff inside brackets for attributes
596 NtMeta(ast::MetaItem),
598 NtVis(ast::Visibility),
600 // Used only for passing items to proc macro attributes (they are not
601 // strictly necessary for that, `Annotatable` can be converted into
602 // tokens directly, but doing that naively regresses pretty-printing).
603 NtTraitItem(ast::TraitItem),
604 NtImplItem(ast::ImplItem),
605 NtForeignItem(ast::ForeignItem),
608 impl PartialEq for Nonterminal {
609 fn eq(&self, rhs: &Self) -> bool {
611 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
612 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
613 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
614 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
615 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
616 // correctly based on data from AST. This will prevent them from matching each other
617 // in macros. The comparison will become possible only when each nonterminal has an
618 // attached token stream from which it was parsed.
624 impl fmt::Debug for Nonterminal {
625 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
627 NtItem(..) => f.pad("NtItem(..)"),
628 NtBlock(..) => f.pad("NtBlock(..)"),
629 NtStmt(..) => f.pad("NtStmt(..)"),
630 NtPat(..) => f.pad("NtPat(..)"),
631 NtExpr(..) => f.pad("NtExpr(..)"),
632 NtTy(..) => f.pad("NtTy(..)"),
633 NtIdent(..) => f.pad("NtIdent(..)"),
634 NtLiteral(..) => f.pad("NtLiteral(..)"),
635 NtMeta(..) => f.pad("NtMeta(..)"),
636 NtPath(..) => f.pad("NtPath(..)"),
637 NtTT(..) => f.pad("NtTT(..)"),
638 NtImplItem(..) => f.pad("NtImplItem(..)"),
639 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
640 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
641 NtVis(..) => f.pad("NtVis(..)"),
642 NtLifetime(..) => f.pad("NtLifetime(..)"),
648 pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
649 // A `Nonterminal` is often a parsed AST item. At this point we now
650 // need to convert the parsed AST to an actual token stream, e.g.
651 // un-parse it basically.
653 // Unfortunately there's not really a great way to do that in a
654 // guaranteed lossless fashion right now. The fallback here is to just
655 // stringify the AST node and reparse it, but this loses all span
658 // As a result, some AST nodes are annotated with the token stream they
659 // came from. Here we attempt to extract these lossless token streams
660 // before we fall back to the stringification.
661 let tokens = match *self {
662 Nonterminal::NtItem(ref item) => {
663 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
665 Nonterminal::NtTraitItem(ref item) => {
666 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
668 Nonterminal::NtImplItem(ref item) => {
669 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
671 Nonterminal::NtIdent(ident, is_raw) => {
672 let token = Token::Ident(ident, is_raw);
673 Some(TokenTree::Token(ident.span, token).into())
675 Nonterminal::NtLifetime(ident) => {
676 let token = Token::Lifetime(ident);
677 Some(TokenTree::Token(ident.span, token).into())
679 Nonterminal::NtTT(ref tt) => {
680 Some(tt.clone().into())
685 // FIXME(#43081): Avoid this pretty-print + reparse hack
686 let source = pprust::nonterminal_to_string(self);
687 let filename = FileName::macro_expansion_source_code(&source);
688 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
690 // During early phases of the compiler the AST could get modified
691 // directly (e.g., attributes added or removed) and the internal cache
692 // of tokens my not be invalidated or updated. Consequently if the
693 // "lossless" token stream disagrees with our actual stringification
694 // (which has historically been much more battle-tested) then we go
695 // with the lossy stream anyway (losing span information).
697 // Note that the comparison isn't `==` here to avoid comparing spans,
698 // but it *also* is a "probable" equality which is a pretty weird
699 // definition. We mostly want to catch actual changes to the AST
700 // like a `#[cfg]` being processed or some weird `macro_rules!`
703 // What we *don't* want to catch is the fact that a user-defined
704 // literal like `0xf` is stringified as `15`, causing the cached token
705 // stream to not be literal `==` token-wise (ignoring spans) to the
706 // token stream we got from stringification.
708 // Instead the "probably equal" check here is "does each token
709 // recursively have the same discriminant?" We basically don't look at
710 // the token values here and assume that such fine grained token stream
711 // modifications, including adding/removing typically non-semantic
712 // tokens such as extra braces and commas, don't happen.
713 if let Some(tokens) = tokens {
714 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
717 info!("cached tokens found, but they're not \"probably equal\", \
718 going with stringified version");
720 return tokens_for_real
724 crate fn is_op(tok: &Token) -> bool {
726 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
727 Ident(..) | Lifetime(..) | Interpolated(..) |
728 Whitespace | Comment | Shebang(..) | Eof => false,
733 fn prepend_attrs(sess: &ParseSess,
734 attrs: &[ast::Attribute],
735 tokens: Option<&tokenstream::TokenStream>,
736 span: syntax_pos::Span)
737 -> Option<tokenstream::TokenStream>
739 let tokens = tokens?;
740 if attrs.len() == 0 {
741 return Some(tokens.clone())
743 let mut builder = tokenstream::TokenStreamBuilder::new();
745 assert_eq!(attr.style, ast::AttrStyle::Outer,
746 "inner attributes should prevent cached tokens from existing");
748 let source = pprust::attr_to_string(attr);
749 let macro_filename = FileName::macro_expansion_source_code(&source);
750 if attr.is_sugared_doc {
751 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
752 builder.push(stream);
756 // synthesize # [ $path $tokens ] manually here
757 let mut brackets = tokenstream::TokenStreamBuilder::new();
759 // For simple paths, push the identifier directly
760 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
761 let ident = attr.path.segments[0].ident;
762 let token = Ident(ident, ident.as_str().starts_with("r#"));
763 brackets.push(tokenstream::TokenTree::Token(ident.span, token));
765 // ... and for more complicated paths, fall back to a reparse hack that
766 // should eventually be removed.
768 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
769 brackets.push(stream);
772 brackets.push(attr.tokens.clone());
774 // The span we list here for `#` and for `[ ... ]` are both wrong in
775 // that it encompasses more than each token, but it hopefully is "good
776 // enough" for now at least.
777 builder.push(tokenstream::TokenTree::Token(attr.span, Pound));
778 let delim_span = DelimSpan::from_single(attr.span);
779 builder.push(tokenstream::TokenTree::Delimited(
780 delim_span, DelimToken::Bracket, brackets.build().into()));
782 builder.push(tokens.clone());
783 Some(builder.build())