2 pub use Nonterminal::*;
7 use crate::ast::{self};
8 use crate::parse::ParseSess;
9 use crate::print::pprust;
11 use crate::symbol::kw;
12 use crate::syntax::parse::parse_stream_from_source_str;
13 use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
15 use syntax_pos::symbol::{self, Symbol};
16 use syntax_pos::{self, Span, FileName};
21 #[cfg(target_arch = "x86_64")]
22 use rustc_data_structures::static_assert_size;
23 use rustc_data_structures::sync::Lrc;
25 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
39 /// A delimiter token.
40 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
42 /// A round parenthesis (i.e., `(` or `)`).
44 /// A square bracket (i.e., `[` or `]`).
46 /// A curly brace (i.e., `{` or `}`).
48 /// An empty delimiter.
53 pub fn len(self) -> usize {
54 if self == NoDelim { 0 } else { 1 }
57 pub fn is_empty(self) -> bool {
62 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
64 Bool, // AST only, must never appear in a `Token`
70 StrRaw(u16), // raw string delimited by `n` hash symbols
72 ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols
76 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
80 pub suffix: Option<Symbol>,
84 crate fn article(self) -> &'static str {
86 Integer | Err => "an",
91 crate fn descr(self) -> &'static str {
93 Bool => panic!("literal token contains `Lit::Bool`"),
94 Byte => "byte literal",
95 Char => "char literal",
96 Integer => "integer literal",
97 Float => "float literal",
98 Str | StrRaw(..) => "string literal",
99 ByteStr | ByteStrRaw(..) => "byte string literal",
100 Err => "invalid literal",
104 crate fn may_have_suffix(self) -> bool {
106 Integer | Float | Err => true,
113 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
114 Lit { kind, symbol, suffix }
118 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
119 let ident_token: Token = Ident(ident, is_raw);
121 !ident_token.is_reserved_ident() ||
122 ident_token.is_path_segment_keyword() ||
126 // FIXME: remove when `await!(..)` syntax is removed
127 // https://github.com/rust-lang/rust/issues/60610
146 ].contains(&ident.name)
149 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
150 let ident_token: Token = Ident(ident, is_raw);
152 !ident_token.is_reserved_ident() ||
153 ident_token.is_path_segment_keyword() ||
163 ].contains(&ident.name)
166 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)]
168 /* Expression-operator symbols. */
183 /* Structural symbols */
199 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
201 /// An opening delimiter (e.g., `{`).
202 OpenDelim(DelimToken),
203 /// A closing delimiter (e.g., `}`).
204 CloseDelim(DelimToken),
209 /* Name components */
210 Ident(ast::Ident, /* is_raw */ bool),
211 Lifetime(ast::Ident),
213 Interpolated(Lrc<Nonterminal>),
215 // Can be expanded into several tokens.
217 DocComment(ast::Name),
219 // Junk. These carry no data because we don't really care about the data
220 // they *would* carry, and don't really want to allocate a new ident for
221 // them. Instead, users could extract that from the associated span.
232 // `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
233 #[cfg(target_arch = "x86_64")]
234 static_assert_size!(Token, 16);
237 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
238 pub fn from_ast_ident(ident: ast::Ident) -> Token {
239 Ident(ident, ident.is_raw_guess())
242 crate fn is_like_plus(&self) -> bool {
244 BinOp(Plus) | BinOpEq(Plus) => true,
249 /// Returns `true` if the token can appear at the start of an expression.
250 crate fn can_begin_expr(&self) -> bool {
252 Ident(ident, is_raw) =>
253 ident_can_begin_expr(ident, is_raw), // value name or keyword
254 OpenDelim(..) | // tuple, array or block
255 Literal(..) | // literal
256 Not | // operator not
257 BinOp(Minus) | // unary minus
258 BinOp(Star) | // dereference
259 BinOp(Or) | OrOr | // closure
260 BinOp(And) | // reference
261 AndAnd | // double reference
262 // DotDotDot is no longer supported, but we need some way to display the error
263 DotDot | DotDotDot | DotDotEq | // range notation
264 Lt | BinOp(Shl) | // associated path
265 ModSep | // global path
266 Lifetime(..) | // labeled loop
267 Pound => true, // expression attributes
268 Interpolated(ref nt) => match **nt {
274 NtLifetime(..) => true,
281 /// Returns `true` if the token can appear at the start of a type.
282 crate fn can_begin_type(&self) -> bool {
284 Ident(ident, is_raw) =>
285 ident_can_begin_type(ident, is_raw), // type name or keyword
286 OpenDelim(Paren) | // tuple
287 OpenDelim(Bracket) | // array
289 BinOp(Star) | // raw pointer
290 BinOp(And) | // reference
291 AndAnd | // double reference
292 Question | // maybe bound in trait object
293 Lifetime(..) | // lifetime bound in trait object
294 Lt | BinOp(Shl) | // associated path
295 ModSep => true, // global path
296 Interpolated(ref nt) => match **nt {
297 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
304 /// Returns `true` if the token can appear at the start of a const param.
305 pub fn can_begin_const_arg(&self) -> bool {
307 OpenDelim(Brace) => true,
308 Interpolated(ref nt) => match **nt {
311 NtLiteral(..) => true,
314 _ => self.can_begin_literal_or_bool(),
318 /// Returns `true` if the token can appear at the start of a generic bound.
319 crate fn can_begin_bound(&self) -> bool {
320 self.is_path_start() || self.is_lifetime() || self.is_keyword(kw::For) ||
321 self == &Question || self == &OpenDelim(Paren)
324 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Token {
325 Literal(Lit::new(kind, symbol, suffix))
328 /// Returns `true` if the token is any literal
329 crate fn is_lit(&self) -> bool {
336 crate fn expect_lit(&self) -> Lit {
339 _=> panic!("`expect_lit` called on non-literal"),
343 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
344 /// for example a '-42', or one of the boolean idents).
345 crate fn can_begin_literal_or_bool(&self) -> bool {
348 BinOp(Minus) => true,
349 Ident(ident, false) if ident.name == kw::True => true,
350 Ident(ident, false) if ident.name == kw::False => true,
351 Interpolated(ref nt) => match **nt {
352 NtLiteral(..) => true,
359 /// Returns an identifier if this token is an identifier.
360 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
362 Ident(ident, is_raw) => Some((ident, is_raw)),
363 Interpolated(ref nt) => match **nt {
364 NtIdent(ident, is_raw) => Some((ident, is_raw)),
370 /// Returns a lifetime identifier if this token is a lifetime.
371 pub fn lifetime(&self) -> Option<ast::Ident> {
373 Lifetime(ident) => Some(ident),
374 Interpolated(ref nt) => match **nt {
375 NtLifetime(ident) => Some(ident),
381 /// Returns `true` if the token is an identifier.
382 pub fn is_ident(&self) -> bool {
383 self.ident().is_some()
385 /// Returns `true` if the token is a lifetime.
386 crate fn is_lifetime(&self) -> bool {
387 self.lifetime().is_some()
390 /// Returns `true` if the token is a identifier whose name is the given
392 crate fn is_ident_named(&self, name: &str) -> bool {
394 Some((ident, _)) => ident.as_str() == name,
399 /// Returns `true` if the token is an interpolated path.
400 fn is_path(&self) -> bool {
401 if let Interpolated(ref nt) = *self {
402 if let NtPath(..) = **nt {
409 /// Returns `true` if the token is either the `mut` or `const` keyword.
410 crate fn is_mutability(&self) -> bool {
411 self.is_keyword(kw::Mut) ||
412 self.is_keyword(kw::Const)
415 crate fn is_qpath_start(&self) -> bool {
416 self == &Lt || self == &BinOp(Shl)
419 crate fn is_path_start(&self) -> bool {
420 self == &ModSep || self.is_qpath_start() || self.is_path() ||
421 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
424 /// Returns `true` if the token is a given keyword, `kw`.
425 pub fn is_keyword(&self, kw: Symbol) -> bool {
426 self.ident().map(|(ident, is_raw)| ident.name == kw && !is_raw).unwrap_or(false)
429 pub fn is_path_segment_keyword(&self) -> bool {
431 Some((id, false)) => id.is_path_segment_keyword(),
436 // Returns true for reserved identifiers used internally for elided lifetimes,
437 // unnamed method parameters, crate root module, error recovery etc.
438 pub fn is_special_ident(&self) -> bool {
440 Some((id, false)) => id.is_special(),
445 /// Returns `true` if the token is a keyword used in the language.
446 crate fn is_used_keyword(&self) -> bool {
448 Some((id, false)) => id.is_used_keyword(),
453 /// Returns `true` if the token is a keyword reserved for possible future use.
454 crate fn is_unused_keyword(&self) -> bool {
456 Some((id, false)) => id.is_unused_keyword(),
461 /// Returns `true` if the token is either a special identifier or a keyword.
462 pub fn is_reserved_ident(&self) -> bool {
464 Some((id, false)) => id.is_reserved(),
469 crate fn glue(self, joint: Token) -> Option<Token> {
480 BinOp(Minus) => LArrow,
493 BinOp(op) => match joint {
495 BinOp(And) if op == And => AndAnd,
496 BinOp(Or) if op == Or => OrOr,
497 Gt if op == Minus => RArrow,
505 DotDot => match joint {
510 Colon => match joint {
514 SingleQuote => match joint {
515 Ident(ident, false) => {
516 let name = Symbol::intern(&format!("'{}", ident));
517 Lifetime(symbol::Ident {
525 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
526 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
527 Question | OpenDelim(..) | CloseDelim(..) |
528 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
529 Whitespace | Comment | Shebang(..) | Eof => return None,
533 /// Returns tokens that are likely to be typed accidentally instead of the current token.
534 /// Enables better error recovery when the wrong token is found.
535 crate fn similar_tokens(&self) -> Option<Vec<Token>> {
537 Comma => Some(vec![Dot, Lt, Semi]),
538 Semi => Some(vec![Colon, Comma]),
543 // See comments in `Nonterminal::to_tokenstream` for why we care about
544 // *probably* equal here rather than actual equality
545 crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
546 if mem::discriminant(self) != mem::discriminant(other) {
549 match (self, other) {
564 (&DotDotDot, &DotDotDot) |
565 (&DotDotEq, &DotDotEq) |
572 (&FatArrow, &FatArrow) |
575 (&Question, &Question) |
576 (&Whitespace, &Whitespace) |
577 (&Comment, &Comment) |
578 (&Eof, &Eof) => true,
580 (&BinOp(a), &BinOp(b)) |
581 (&BinOpEq(a), &BinOpEq(b)) => a == b,
583 (&OpenDelim(a), &OpenDelim(b)) |
584 (&CloseDelim(a), &CloseDelim(b)) => a == b,
586 (&DocComment(a), &DocComment(b)) |
587 (&Shebang(a), &Shebang(b)) => a == b,
589 (&Literal(a), &Literal(b)) => a == b,
591 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
592 (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name ||
593 a.name == kw::DollarCrate ||
594 c.name == kw::DollarCrate),
596 (&Interpolated(_), &Interpolated(_)) => false,
598 _ => panic!("forgot to add a token?"),
603 #[derive(Clone, RustcEncodable, RustcDecodable)]
604 /// For interpolation during macro expansion.
605 pub enum Nonterminal {
606 NtItem(P<ast::Item>),
607 NtBlock(P<ast::Block>),
610 NtExpr(P<ast::Expr>),
612 NtIdent(ast::Ident, /* is_raw */ bool),
613 NtLifetime(ast::Ident),
614 NtLiteral(P<ast::Expr>),
615 /// Stuff inside brackets for attributes
616 NtMeta(ast::MetaItem),
618 NtVis(ast::Visibility),
620 // Used only for passing items to proc macro attributes (they are not
621 // strictly necessary for that, `Annotatable` can be converted into
622 // tokens directly, but doing that naively regresses pretty-printing).
623 NtTraitItem(ast::TraitItem),
624 NtImplItem(ast::ImplItem),
625 NtForeignItem(ast::ForeignItem),
628 impl PartialEq for Nonterminal {
629 fn eq(&self, rhs: &Self) -> bool {
631 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
632 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
633 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
634 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
635 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
636 // correctly based on data from AST. This will prevent them from matching each other
637 // in macros. The comparison will become possible only when each nonterminal has an
638 // attached token stream from which it was parsed.
644 impl fmt::Debug for Nonterminal {
645 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
647 NtItem(..) => f.pad("NtItem(..)"),
648 NtBlock(..) => f.pad("NtBlock(..)"),
649 NtStmt(..) => f.pad("NtStmt(..)"),
650 NtPat(..) => f.pad("NtPat(..)"),
651 NtExpr(..) => f.pad("NtExpr(..)"),
652 NtTy(..) => f.pad("NtTy(..)"),
653 NtIdent(..) => f.pad("NtIdent(..)"),
654 NtLiteral(..) => f.pad("NtLiteral(..)"),
655 NtMeta(..) => f.pad("NtMeta(..)"),
656 NtPath(..) => f.pad("NtPath(..)"),
657 NtTT(..) => f.pad("NtTT(..)"),
658 NtImplItem(..) => f.pad("NtImplItem(..)"),
659 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
660 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
661 NtVis(..) => f.pad("NtVis(..)"),
662 NtLifetime(..) => f.pad("NtLifetime(..)"),
668 pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
669 // A `Nonterminal` is often a parsed AST item. At this point we now
670 // need to convert the parsed AST to an actual token stream, e.g.
671 // un-parse it basically.
673 // Unfortunately there's not really a great way to do that in a
674 // guaranteed lossless fashion right now. The fallback here is to just
675 // stringify the AST node and reparse it, but this loses all span
678 // As a result, some AST nodes are annotated with the token stream they
679 // came from. Here we attempt to extract these lossless token streams
680 // before we fall back to the stringification.
681 let tokens = match *self {
682 Nonterminal::NtItem(ref item) => {
683 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
685 Nonterminal::NtTraitItem(ref item) => {
686 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
688 Nonterminal::NtImplItem(ref item) => {
689 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
691 Nonterminal::NtIdent(ident, is_raw) => {
692 let token = Token::Ident(ident, is_raw);
693 Some(TokenTree::Token(ident.span, token).into())
695 Nonterminal::NtLifetime(ident) => {
696 let token = Token::Lifetime(ident);
697 Some(TokenTree::Token(ident.span, token).into())
699 Nonterminal::NtTT(ref tt) => {
700 Some(tt.clone().into())
705 // FIXME(#43081): Avoid this pretty-print + reparse hack
706 let source = pprust::nonterminal_to_string(self);
707 let filename = FileName::macro_expansion_source_code(&source);
708 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
710 // During early phases of the compiler the AST could get modified
711 // directly (e.g., attributes added or removed) and the internal cache
712 // of tokens my not be invalidated or updated. Consequently if the
713 // "lossless" token stream disagrees with our actual stringification
714 // (which has historically been much more battle-tested) then we go
715 // with the lossy stream anyway (losing span information).
717 // Note that the comparison isn't `==` here to avoid comparing spans,
718 // but it *also* is a "probable" equality which is a pretty weird
719 // definition. We mostly want to catch actual changes to the AST
720 // like a `#[cfg]` being processed or some weird `macro_rules!`
723 // What we *don't* want to catch is the fact that a user-defined
724 // literal like `0xf` is stringified as `15`, causing the cached token
725 // stream to not be literal `==` token-wise (ignoring spans) to the
726 // token stream we got from stringification.
728 // Instead the "probably equal" check here is "does each token
729 // recursively have the same discriminant?" We basically don't look at
730 // the token values here and assume that such fine grained token stream
731 // modifications, including adding/removing typically non-semantic
732 // tokens such as extra braces and commas, don't happen.
733 if let Some(tokens) = tokens {
734 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
737 info!("cached tokens found, but they're not \"probably equal\", \
738 going with stringified version");
740 return tokens_for_real
744 crate fn is_op(tok: &Token) -> bool {
746 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
747 Ident(..) | Lifetime(..) | Interpolated(..) |
748 Whitespace | Comment | Shebang(..) | Eof => false,
753 fn prepend_attrs(sess: &ParseSess,
754 attrs: &[ast::Attribute],
755 tokens: Option<&tokenstream::TokenStream>,
756 span: syntax_pos::Span)
757 -> Option<tokenstream::TokenStream>
759 let tokens = tokens?;
760 if attrs.len() == 0 {
761 return Some(tokens.clone())
763 let mut builder = tokenstream::TokenStreamBuilder::new();
765 assert_eq!(attr.style, ast::AttrStyle::Outer,
766 "inner attributes should prevent cached tokens from existing");
768 let source = pprust::attr_to_string(attr);
769 let macro_filename = FileName::macro_expansion_source_code(&source);
770 if attr.is_sugared_doc {
771 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
772 builder.push(stream);
776 // synthesize # [ $path $tokens ] manually here
777 let mut brackets = tokenstream::TokenStreamBuilder::new();
779 // For simple paths, push the identifier directly
780 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
781 let ident = attr.path.segments[0].ident;
782 let token = Ident(ident, ident.as_str().starts_with("r#"));
783 brackets.push(tokenstream::TokenTree::Token(ident.span, token));
785 // ... and for more complicated paths, fall back to a reparse hack that
786 // should eventually be removed.
788 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
789 brackets.push(stream);
792 brackets.push(attr.tokens.clone());
794 // The span we list here for `#` and for `[ ... ]` are both wrong in
795 // that it encompasses more than each token, but it hopefully is "good
796 // enough" for now at least.
797 builder.push(tokenstream::TokenTree::Token(attr.span, Pound));
798 let delim_span = DelimSpan::from_single(attr.span);
799 builder.push(tokenstream::TokenTree::Delimited(
800 delim_span, DelimToken::Bracket, brackets.build().into()));
802 builder.push(tokens.clone());
803 Some(builder.build())