2 pub use Nonterminal::*;
7 use crate::ast::{self};
8 use crate::parse::ParseSess;
9 use crate::print::pprust;
11 use crate::symbol::kw;
12 use crate::syntax::parse::parse_stream_from_source_str;
13 use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
15 use syntax_pos::symbol::{self, Symbol};
16 use syntax_pos::{self, Span, FileName};
21 #[cfg(target_arch = "x86_64")]
22 use rustc_data_structures::static_assert_size;
23 use rustc_data_structures::sync::Lrc;
25 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
39 /// A delimiter token.
40 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
42 /// A round parenthesis (i.e., `(` or `)`).
44 /// A square bracket (i.e., `[` or `]`).
46 /// A curly brace (i.e., `{` or `}`).
48 /// An empty delimiter.
53 pub fn len(self) -> usize {
54 if self == NoDelim { 0 } else { 1 }
57 pub fn is_empty(self) -> bool {
62 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
64 Bool, // AST only, must never appear in a `Token`
70 StrRaw(u16), // raw string delimited by `n` hash symbols
72 ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols
77 #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)]
81 pub suffix: Option<Symbol>,
85 /// An English article for the literal token kind.
86 crate fn article(self) -> &'static str {
88 Integer | Err => "an",
93 crate fn descr(self) -> &'static str {
95 Bool => panic!("literal token contains `Lit::Bool`"),
100 Str | StrRaw(..) => "string",
101 ByteStr | ByteStrRaw(..) => "byte string",
106 crate fn may_have_suffix(self) -> bool {
108 Integer | Float | Err => true,
115 pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
116 Lit { kind, symbol, suffix }
120 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
121 let ident_token: TokenKind = Ident(ident, is_raw);
123 !ident_token.is_reserved_ident() ||
124 ident_token.is_path_segment_keyword() ||
128 // FIXME: remove when `await!(..)` syntax is removed
129 // https://github.com/rust-lang/rust/issues/60610
148 ].contains(&ident.name)
151 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
152 let ident_token: TokenKind = Ident(ident, is_raw);
154 !ident_token.is_reserved_ident() ||
155 ident_token.is_path_segment_keyword() ||
165 ].contains(&ident.name)
168 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)]
170 /* Expression-operator symbols. */
185 /* Structural symbols */
201 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
203 /// An opening delimiter (e.g., `{`).
204 OpenDelim(DelimToken),
205 /// A closing delimiter (e.g., `}`).
206 CloseDelim(DelimToken),
211 /* Name components */
212 Ident(ast::Ident, /* is_raw */ bool),
213 Lifetime(ast::Ident),
215 Interpolated(Lrc<Nonterminal>),
217 // Can be expanded into several tokens.
219 DocComment(ast::Name),
221 // Junk. These carry no data because we don't really care about the data
222 // they *would* carry, and don't really want to allocate a new ident for
223 // them. Instead, users could extract that from the associated span.
234 // `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger.
235 #[cfg(target_arch = "x86_64")]
236 static_assert_size!(TokenKind, 16);
238 #[derive(Clone, Debug)]
245 /// Recovers a `TokenKind` from an `ast::Ident`. This creates a raw identifier if necessary.
246 pub fn from_ast_ident(ident: ast::Ident) -> TokenKind {
247 Ident(ident, ident.is_raw_guess())
250 crate fn is_like_plus(&self) -> bool {
252 BinOp(Plus) | BinOpEq(Plus) => true,
257 /// Returns `true` if the token can appear at the start of an expression.
258 crate fn can_begin_expr(&self) -> bool {
260 Ident(ident, is_raw) =>
261 ident_can_begin_expr(ident, is_raw), // value name or keyword
262 OpenDelim(..) | // tuple, array or block
263 Literal(..) | // literal
264 Not | // operator not
265 BinOp(Minus) | // unary minus
266 BinOp(Star) | // dereference
267 BinOp(Or) | OrOr | // closure
268 BinOp(And) | // reference
269 AndAnd | // double reference
270 // DotDotDot is no longer supported, but we need some way to display the error
271 DotDot | DotDotDot | DotDotEq | // range notation
272 Lt | BinOp(Shl) | // associated path
273 ModSep | // global path
274 Lifetime(..) | // labeled loop
275 Pound => true, // expression attributes
276 Interpolated(ref nt) => match **nt {
282 NtLifetime(..) => true,
289 /// Returns `true` if the token can appear at the start of a type.
290 crate fn can_begin_type(&self) -> bool {
292 Ident(ident, is_raw) =>
293 ident_can_begin_type(ident, is_raw), // type name or keyword
294 OpenDelim(Paren) | // tuple
295 OpenDelim(Bracket) | // array
297 BinOp(Star) | // raw pointer
298 BinOp(And) | // reference
299 AndAnd | // double reference
300 Question | // maybe bound in trait object
301 Lifetime(..) | // lifetime bound in trait object
302 Lt | BinOp(Shl) | // associated path
303 ModSep => true, // global path
304 Interpolated(ref nt) => match **nt {
305 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
312 /// Returns `true` if the token can appear at the start of a const param.
313 pub fn can_begin_const_arg(&self) -> bool {
315 OpenDelim(Brace) => true,
316 Interpolated(ref nt) => match **nt {
319 NtLiteral(..) => true,
322 _ => self.can_begin_literal_or_bool(),
326 /// Returns `true` if the token can appear at the start of a generic bound.
327 crate fn can_begin_bound(&self) -> bool {
328 self.is_path_start() || self.is_lifetime() || self.is_keyword(kw::For) ||
329 self == &Question || self == &OpenDelim(Paren)
332 pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
333 Literal(Lit::new(kind, symbol, suffix))
336 /// Returns `true` if the token is any literal
337 crate fn is_lit(&self) -> bool {
344 crate fn expect_lit(&self) -> Lit {
347 _=> panic!("`expect_lit` called on non-literal"),
351 /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
352 /// for example a '-42', or one of the boolean idents).
353 crate fn can_begin_literal_or_bool(&self) -> bool {
356 BinOp(Minus) => true,
357 Ident(ident, false) if ident.name == kw::True => true,
358 Ident(ident, false) if ident.name == kw::False => true,
359 Interpolated(ref nt) => match **nt {
360 NtLiteral(..) => true,
367 /// Returns an identifier if this token is an identifier.
368 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
370 Ident(ident, is_raw) => Some((ident, is_raw)),
371 Interpolated(ref nt) => match **nt {
372 NtIdent(ident, is_raw) => Some((ident, is_raw)),
378 /// Returns a lifetime identifier if this token is a lifetime.
379 pub fn lifetime(&self) -> Option<ast::Ident> {
381 Lifetime(ident) => Some(ident),
382 Interpolated(ref nt) => match **nt {
383 NtLifetime(ident) => Some(ident),
389 /// Returns `true` if the token is an identifier.
390 pub fn is_ident(&self) -> bool {
391 self.ident().is_some()
393 /// Returns `true` if the token is a lifetime.
394 crate fn is_lifetime(&self) -> bool {
395 self.lifetime().is_some()
398 /// Returns `true` if the token is a identifier whose name is the given
400 crate fn is_ident_named(&self, name: Symbol) -> bool {
402 Some((ident, _)) => ident.name == name,
407 /// Returns `true` if the token is an interpolated path.
408 fn is_path(&self) -> bool {
409 if let Interpolated(ref nt) = *self {
410 if let NtPath(..) = **nt {
417 /// Returns `true` if the token is either the `mut` or `const` keyword.
418 crate fn is_mutability(&self) -> bool {
419 self.is_keyword(kw::Mut) ||
420 self.is_keyword(kw::Const)
423 crate fn is_qpath_start(&self) -> bool {
424 self == &Lt || self == &BinOp(Shl)
427 crate fn is_path_start(&self) -> bool {
428 self == &ModSep || self.is_qpath_start() || self.is_path() ||
429 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
432 /// Returns `true` if the token is a given keyword, `kw`.
433 pub fn is_keyword(&self, kw: Symbol) -> bool {
434 self.ident().map(|(ident, is_raw)| ident.name == kw && !is_raw).unwrap_or(false)
437 pub fn is_path_segment_keyword(&self) -> bool {
439 Some((id, false)) => id.is_path_segment_keyword(),
444 // Returns true for reserved identifiers used internally for elided lifetimes,
445 // unnamed method parameters, crate root module, error recovery etc.
446 pub fn is_special_ident(&self) -> bool {
448 Some((id, false)) => id.is_special(),
453 /// Returns `true` if the token is a keyword used in the language.
454 crate fn is_used_keyword(&self) -> bool {
456 Some((id, false)) => id.is_used_keyword(),
461 /// Returns `true` if the token is a keyword reserved for possible future use.
462 crate fn is_unused_keyword(&self) -> bool {
464 Some((id, false)) => id.is_unused_keyword(),
469 /// Returns `true` if the token is either a special identifier or a keyword.
470 pub fn is_reserved_ident(&self) -> bool {
472 Some((id, false)) => id.is_reserved(),
477 crate fn glue(self, joint: TokenKind) -> Option<TokenKind> {
488 BinOp(Minus) => LArrow,
501 BinOp(op) => match joint {
503 BinOp(And) if op == And => AndAnd,
504 BinOp(Or) if op == Or => OrOr,
505 Gt if op == Minus => RArrow,
513 DotDot => match joint {
518 Colon => match joint {
522 SingleQuote => match joint {
523 Ident(ident, false) => {
524 let name = Symbol::intern(&format!("'{}", ident));
525 Lifetime(symbol::Ident {
533 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
534 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
535 Question | OpenDelim(..) | CloseDelim(..) |
536 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
537 Whitespace | Comment | Shebang(..) | Eof => return None,
541 /// Returns tokens that are likely to be typed accidentally instead of the current token.
542 /// Enables better error recovery when the wrong token is found.
543 crate fn similar_tokens(&self) -> Option<Vec<TokenKind>> {
545 Comma => Some(vec![Dot, Lt, Semi]),
546 Semi => Some(vec![Colon, Comma]),
551 // See comments in `Nonterminal::to_tokenstream` for why we care about
552 // *probably* equal here rather than actual equality
553 crate fn probably_equal_for_proc_macro(&self, other: &TokenKind) -> bool {
554 if mem::discriminant(self) != mem::discriminant(other) {
557 match (self, other) {
572 (&DotDotDot, &DotDotDot) |
573 (&DotDotEq, &DotDotEq) |
580 (&FatArrow, &FatArrow) |
583 (&Question, &Question) |
584 (&Whitespace, &Whitespace) |
585 (&Comment, &Comment) |
586 (&Eof, &Eof) => true,
588 (&BinOp(a), &BinOp(b)) |
589 (&BinOpEq(a), &BinOpEq(b)) => a == b,
591 (&OpenDelim(a), &OpenDelim(b)) |
592 (&CloseDelim(a), &CloseDelim(b)) => a == b,
594 (&DocComment(a), &DocComment(b)) |
595 (&Shebang(a), &Shebang(b)) => a == b,
597 (&Literal(a), &Literal(b)) => a == b,
599 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
600 (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name ||
601 a.name == kw::DollarCrate ||
602 c.name == kw::DollarCrate),
604 (&Interpolated(_), &Interpolated(_)) => false,
606 _ => panic!("forgot to add a token?"),
611 impl PartialEq<TokenKind> for Token {
612 fn eq(&self, rhs: &TokenKind) -> bool {
617 #[derive(Clone, RustcEncodable, RustcDecodable)]
618 /// For interpolation during macro expansion.
619 pub enum Nonterminal {
620 NtItem(P<ast::Item>),
621 NtBlock(P<ast::Block>),
624 NtExpr(P<ast::Expr>),
626 NtIdent(ast::Ident, /* is_raw */ bool),
627 NtLifetime(ast::Ident),
628 NtLiteral(P<ast::Expr>),
629 /// Stuff inside brackets for attributes
630 NtMeta(ast::MetaItem),
632 NtVis(ast::Visibility),
634 // Used only for passing items to proc macro attributes (they are not
635 // strictly necessary for that, `Annotatable` can be converted into
636 // tokens directly, but doing that naively regresses pretty-printing).
637 NtTraitItem(ast::TraitItem),
638 NtImplItem(ast::ImplItem),
639 NtForeignItem(ast::ForeignItem),
642 impl PartialEq for Nonterminal {
643 fn eq(&self, rhs: &Self) -> bool {
645 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
646 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
647 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
648 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
649 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
650 // correctly based on data from AST. This will prevent them from matching each other
651 // in macros. The comparison will become possible only when each nonterminal has an
652 // attached token stream from which it was parsed.
658 impl fmt::Debug for Nonterminal {
659 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
661 NtItem(..) => f.pad("NtItem(..)"),
662 NtBlock(..) => f.pad("NtBlock(..)"),
663 NtStmt(..) => f.pad("NtStmt(..)"),
664 NtPat(..) => f.pad("NtPat(..)"),
665 NtExpr(..) => f.pad("NtExpr(..)"),
666 NtTy(..) => f.pad("NtTy(..)"),
667 NtIdent(..) => f.pad("NtIdent(..)"),
668 NtLiteral(..) => f.pad("NtLiteral(..)"),
669 NtMeta(..) => f.pad("NtMeta(..)"),
670 NtPath(..) => f.pad("NtPath(..)"),
671 NtTT(..) => f.pad("NtTT(..)"),
672 NtImplItem(..) => f.pad("NtImplItem(..)"),
673 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
674 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
675 NtVis(..) => f.pad("NtVis(..)"),
676 NtLifetime(..) => f.pad("NtLifetime(..)"),
682 pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
683 // A `Nonterminal` is often a parsed AST item. At this point we now
684 // need to convert the parsed AST to an actual token stream, e.g.
685 // un-parse it basically.
687 // Unfortunately there's not really a great way to do that in a
688 // guaranteed lossless fashion right now. The fallback here is to just
689 // stringify the AST node and reparse it, but this loses all span
692 // As a result, some AST nodes are annotated with the token stream they
693 // came from. Here we attempt to extract these lossless token streams
694 // before we fall back to the stringification.
695 let tokens = match *self {
696 Nonterminal::NtItem(ref item) => {
697 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
699 Nonterminal::NtTraitItem(ref item) => {
700 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
702 Nonterminal::NtImplItem(ref item) => {
703 prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
705 Nonterminal::NtIdent(ident, is_raw) => {
706 let token = Ident(ident, is_raw);
707 Some(TokenTree::Token(ident.span, token).into())
709 Nonterminal::NtLifetime(ident) => {
710 let token = Lifetime(ident);
711 Some(TokenTree::Token(ident.span, token).into())
713 Nonterminal::NtTT(ref tt) => {
714 Some(tt.clone().into())
719 // FIXME(#43081): Avoid this pretty-print + reparse hack
720 let source = pprust::nonterminal_to_string(self);
721 let filename = FileName::macro_expansion_source_code(&source);
722 let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
724 // During early phases of the compiler the AST could get modified
725 // directly (e.g., attributes added or removed) and the internal cache
726 // of tokens my not be invalidated or updated. Consequently if the
727 // "lossless" token stream disagrees with our actual stringification
728 // (which has historically been much more battle-tested) then we go
729 // with the lossy stream anyway (losing span information).
731 // Note that the comparison isn't `==` here to avoid comparing spans,
732 // but it *also* is a "probable" equality which is a pretty weird
733 // definition. We mostly want to catch actual changes to the AST
734 // like a `#[cfg]` being processed or some weird `macro_rules!`
737 // What we *don't* want to catch is the fact that a user-defined
738 // literal like `0xf` is stringified as `15`, causing the cached token
739 // stream to not be literal `==` token-wise (ignoring spans) to the
740 // token stream we got from stringification.
742 // Instead the "probably equal" check here is "does each token
743 // recursively have the same discriminant?" We basically don't look at
744 // the token values here and assume that such fine grained token stream
745 // modifications, including adding/removing typically non-semantic
746 // tokens such as extra braces and commas, don't happen.
747 if let Some(tokens) = tokens {
748 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
751 info!("cached tokens found, but they're not \"probably equal\", \
752 going with stringified version");
754 return tokens_for_real
758 crate fn is_op(tok: &TokenKind) -> bool {
760 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
761 Ident(..) | Lifetime(..) | Interpolated(..) |
762 Whitespace | Comment | Shebang(..) | Eof => false,
767 fn prepend_attrs(sess: &ParseSess,
768 attrs: &[ast::Attribute],
769 tokens: Option<&tokenstream::TokenStream>,
770 span: syntax_pos::Span)
771 -> Option<tokenstream::TokenStream>
773 let tokens = tokens?;
774 if attrs.len() == 0 {
775 return Some(tokens.clone())
777 let mut builder = tokenstream::TokenStreamBuilder::new();
779 assert_eq!(attr.style, ast::AttrStyle::Outer,
780 "inner attributes should prevent cached tokens from existing");
782 let source = pprust::attr_to_string(attr);
783 let macro_filename = FileName::macro_expansion_source_code(&source);
784 if attr.is_sugared_doc {
785 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
786 builder.push(stream);
790 // synthesize # [ $path $tokens ] manually here
791 let mut brackets = tokenstream::TokenStreamBuilder::new();
793 // For simple paths, push the identifier directly
794 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
795 let ident = attr.path.segments[0].ident;
796 let token = Ident(ident, ident.as_str().starts_with("r#"));
797 brackets.push(tokenstream::TokenTree::Token(ident.span, token));
799 // ... and for more complicated paths, fall back to a reparse hack that
800 // should eventually be removed.
802 let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
803 brackets.push(stream);
806 brackets.push(attr.tokens.clone());
808 // The span we list here for `#` and for `[ ... ]` are both wrong in
809 // that it encompasses more than each token, but it hopefully is "good
810 // enough" for now at least.
811 builder.push(tokenstream::TokenTree::Token(attr.span, Pound));
812 let delim_span = DelimSpan::from_single(attr.span);
813 builder.push(tokenstream::TokenTree::Delimited(
814 delim_span, DelimToken::Bracket, brackets.build().into()));
816 builder.push(tokens.clone());
817 Some(builder.build())