1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 pub use self::BinOpToken::*;
12 pub use self::Nonterminal::*;
13 pub use self::DelimToken::*;
15 pub use self::Token::*;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use syntax::parse::parse_stream_from_source_str;
24 use syntax_pos::{self, Span, FileName};
25 use syntax_pos::symbol::{self, Symbol};
26 use tokenstream::{self, DelimSpan, TokenStream, TokenTree};
30 use rustc_data_structures::sync::{Lrc, Lock};
32 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
47 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
49 /// A round parenthesis: `(` or `)`
51 /// A square bracket: `[` or `]`
53 /// A curly brace: `{` or `}`
55 /// An empty delimiter
60 pub fn len(self) -> usize {
61 if self == NoDelim { 0 } else { 1 }
64 pub fn is_empty(self) -> bool {
69 #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
76 StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */
78 ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */
82 crate fn literal_name(&self) -> &'static str {
84 Byte(_) => "byte literal",
85 Char(_) => "char literal",
86 Integer(_) => "integer literal",
87 Float(_) => "float literal",
88 Str_(_) | StrRaw(..) => "string literal",
89 ByteStr(_) | ByteStrRaw(..) => "byte string literal"
93 // See comments in `interpolated_to_tokenstream` for why we care about
94 // *probably* equal here rather than actual equality
95 fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
96 mem::discriminant(self) == mem::discriminant(other)
100 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
101 let ident_token: Token = Ident(ident, is_raw);
103 !ident_token.is_reserved_ident() ||
104 ident_token.is_path_segment_keyword() ||
106 keywords::Async.name(),
108 keywords::Box.name(),
109 keywords::Break.name(),
110 keywords::Continue.name(),
111 keywords::False.name(),
112 keywords::For.name(),
114 keywords::Loop.name(),
115 keywords::Match.name(),
116 keywords::Move.name(),
117 keywords::Return.name(),
118 keywords::True.name(),
119 keywords::Unsafe.name(),
120 keywords::While.name(),
121 keywords::Yield.name(),
122 keywords::Static.name(),
123 ].contains(&ident.name)
126 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
127 let ident_token: Token = Ident(ident, is_raw);
129 !ident_token.is_reserved_ident() ||
130 ident_token.is_path_segment_keyword() ||
132 keywords::Underscore.name(),
133 keywords::For.name(),
134 keywords::Impl.name(),
136 keywords::Unsafe.name(),
137 keywords::Extern.name(),
138 keywords::Typeof.name(),
139 keywords::Dyn.name(),
140 ].contains(&ident.name)
143 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)]
145 /* Expression-operator symbols. */
160 /* Structural symbols */
176 /// Used by proc macros for representing lifetimes, not generated by lexer right now.
178 /// An opening delimiter, eg. `{`
179 OpenDelim(DelimToken),
180 /// A closing delimiter, eg. `}`
181 CloseDelim(DelimToken),
184 Literal(Lit, Option<ast::Name>),
186 /* Name components */
187 Ident(ast::Ident, /* is_raw */ bool),
188 Lifetime(ast::Ident),
190 // The `LazyTokenStream` is a pure function of the `Nonterminal`,
191 // and so the `LazyTokenStream` can be ignored by Eq, Hash, etc.
192 Interpolated(Lrc<(Nonterminal, LazyTokenStream)>),
193 // Can be expanded into several tokens.
195 DocComment(ast::Name),
197 // Junk. These carry no data because we don't really care about the data
198 // they *would* carry, and don't really want to allocate a new ident for
199 // them. Instead, users could extract that from the associated span.
211 pub fn interpolated(nt: Nonterminal) -> Token {
212 Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))
215 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
216 pub fn from_ast_ident(ident: ast::Ident) -> Token {
217 Ident(ident, ident.is_raw_guess())
220 crate fn is_like_plus(&self) -> bool {
222 BinOp(Plus) | BinOpEq(Plus) => true,
227 /// Returns `true` if the token can appear at the start of an expression.
228 crate fn can_begin_expr(&self) -> bool {
230 Ident(ident, is_raw) =>
231 ident_can_begin_expr(ident, is_raw), // value name or keyword
232 OpenDelim(..) | // tuple, array or block
233 Literal(..) | // literal
234 Not | // operator not
235 BinOp(Minus) | // unary minus
236 BinOp(Star) | // dereference
237 BinOp(Or) | OrOr | // closure
238 BinOp(And) | // reference
239 AndAnd | // double reference
240 // DotDotDot is no longer supported, but we need some way to display the error
241 DotDot | DotDotDot | DotDotEq | // range notation
242 Lt | BinOp(Shl) | // associated path
243 ModSep | // global path
244 Lifetime(..) | // labeled loop
245 Pound => true, // expression attributes
246 Interpolated(ref nt) => match nt.0 {
252 NtLifetime(..) => true,
259 /// Returns `true` if the token can appear at the start of a type.
260 crate fn can_begin_type(&self) -> bool {
262 Ident(ident, is_raw) =>
263 ident_can_begin_type(ident, is_raw), // type name or keyword
264 OpenDelim(Paren) | // tuple
265 OpenDelim(Bracket) | // array
267 BinOp(Star) | // raw pointer
268 BinOp(And) | // reference
269 AndAnd | // double reference
270 Question | // maybe bound in trait object
271 Lifetime(..) | // lifetime bound in trait object
272 Lt | BinOp(Shl) | // associated path
273 ModSep => true, // global path
274 Interpolated(ref nt) => match nt.0 {
275 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
282 /// Returns `true` if the token can appear at the start of a generic bound.
283 crate fn can_begin_bound(&self) -> bool {
284 self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) ||
285 self == &Question || self == &OpenDelim(Paren)
288 /// Returns `true` if the token is any literal
289 crate fn is_lit(&self) -> bool {
296 /// Returns `true` if the token is any literal, a minus (which can follow a literal,
297 /// for example a '-42', or one of the boolean idents).
298 crate fn can_begin_literal_or_bool(&self) -> bool {
301 BinOp(Minus) => true,
302 Ident(ident, false) if ident.name == keywords::True.name() => true,
303 Ident(ident, false) if ident.name == keywords::False.name() => true,
304 Interpolated(ref nt) => match nt.0 {
305 NtLiteral(..) => true,
312 /// Returns an identifier if this token is an identifier.
313 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
315 Ident(ident, is_raw) => Some((ident, is_raw)),
316 Interpolated(ref nt) => match nt.0 {
317 NtIdent(ident, is_raw) => Some((ident, is_raw)),
323 /// Returns a lifetime identifier if this token is a lifetime.
324 pub fn lifetime(&self) -> Option<ast::Ident> {
326 Lifetime(ident) => Some(ident),
327 Interpolated(ref nt) => match nt.0 {
328 NtLifetime(ident) => Some(ident),
334 /// Returns `true` if the token is an identifier.
335 pub fn is_ident(&self) -> bool {
336 self.ident().is_some()
338 /// Returns `true` if the token is a lifetime.
339 crate fn is_lifetime(&self) -> bool {
340 self.lifetime().is_some()
343 /// Returns `true` if the token is a identifier whose name is the given
345 crate fn is_ident_named(&self, name: &str) -> bool {
347 Some((ident, _)) => ident.as_str() == name,
352 /// Returns `true` if the token is an interpolated path.
353 fn is_path(&self) -> bool {
354 if let Interpolated(ref nt) = *self {
355 if let NtPath(..) = nt.0 {
362 /// Returns `true` if the token is either the `mut` or `const` keyword.
363 crate fn is_mutability(&self) -> bool {
364 self.is_keyword(keywords::Mut) ||
365 self.is_keyword(keywords::Const)
368 crate fn is_qpath_start(&self) -> bool {
369 self == &Lt || self == &BinOp(Shl)
372 crate fn is_path_start(&self) -> bool {
373 self == &ModSep || self.is_qpath_start() || self.is_path() ||
374 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
377 /// Returns `true` if the token is a given keyword, `kw`.
378 pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
379 self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false)
382 pub fn is_path_segment_keyword(&self) -> bool {
384 Some((id, false)) => id.is_path_segment_keyword(),
389 // Returns true for reserved identifiers used internally for elided lifetimes,
390 // unnamed method parameters, crate root module, error recovery etc.
391 pub fn is_special_ident(&self) -> bool {
393 Some((id, false)) => id.is_special(),
398 /// Returns `true` if the token is a keyword used in the language.
399 crate fn is_used_keyword(&self) -> bool {
401 Some((id, false)) => id.is_used_keyword(),
406 /// Returns `true` if the token is a keyword reserved for possible future use.
407 crate fn is_unused_keyword(&self) -> bool {
409 Some((id, false)) => id.is_unused_keyword(),
414 /// Returns `true` if the token is either a special identifier or a keyword.
415 pub fn is_reserved_ident(&self) -> bool {
417 Some((id, false)) => id.is_reserved(),
422 crate fn glue(self, joint: Token) -> Option<Token> {
433 BinOp(Minus) => LArrow,
446 BinOp(op) => match joint {
448 BinOp(And) if op == And => AndAnd,
449 BinOp(Or) if op == Or => OrOr,
450 Gt if op == Minus => RArrow,
458 DotDot => match joint {
463 Colon => match joint {
467 SingleQuote => match joint {
468 Ident(ident, false) => {
469 let name = Symbol::intern(&format!("'{}", ident));
470 Lifetime(symbol::Ident {
478 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot |
479 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
480 Question | OpenDelim(..) | CloseDelim(..) => return None,
482 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
483 Whitespace | Comment | Shebang(..) | Eof => return None,
487 /// Returns tokens that are likely to be typed accidentally instead of the current token.
488 /// Enables better error recovery when the wrong token is found.
489 crate fn similar_tokens(&self) -> Option<Vec<Token>> {
491 Comma => Some(vec![Dot, Lt]),
492 Semi => Some(vec![Colon]),
497 pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
500 let nt = match *self {
501 Token::Interpolated(ref nt) => nt,
502 _ => panic!("only works on interpolated tokens"),
505 // An `Interpolated` token means that we have a `Nonterminal`
506 // which is often a parsed AST item. At this point we now need
507 // to convert the parsed AST to an actual token stream, e.g.
508 // un-parse it basically.
510 // Unfortunately there's not really a great way to do that in a
511 // guaranteed lossless fashion right now. The fallback here is
512 // to just stringify the AST node and reparse it, but this loses
513 // all span information.
515 // As a result, some AST nodes are annotated with the token
516 // stream they came from. Here we attempt to extract these
517 // lossless token streams before we fall back to the
519 let mut tokens = None;
522 Nonterminal::NtItem(ref item) => {
523 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
525 Nonterminal::NtTraitItem(ref item) => {
526 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
528 Nonterminal::NtImplItem(ref item) => {
529 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
531 Nonterminal::NtIdent(ident, is_raw) => {
532 let token = Token::Ident(ident, is_raw);
533 tokens = Some(TokenTree::Token(ident.span, token).into());
535 Nonterminal::NtLifetime(ident) => {
536 let token = Token::Lifetime(ident);
537 tokens = Some(TokenTree::Token(ident.span, token).into());
539 Nonterminal::NtTT(ref tt) => {
540 tokens = Some(tt.clone().into());
545 let tokens_for_real = nt.1.force(|| {
546 // FIXME(#43081): Avoid this pretty-print + reparse hack
547 let source = pprust::token_to_string(self);
548 parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
551 // During early phases of the compiler the AST could get modified
552 // directly (e.g. attributes added or removed) and the internal cache
553 // of tokens my not be invalidated or updated. Consequently if the
554 // "lossless" token stream disagrees with our actual stringification
555 // (which has historically been much more battle-tested) then we go
556 // with the lossy stream anyway (losing span information).
558 // Note that the comparison isn't `==` here to avoid comparing spans,
559 // but it *also* is a "probable" equality which is a pretty weird
560 // definition. We mostly want to catch actual changes to the AST
561 // like a `#[cfg]` being processed or some weird `macro_rules!`
564 // What we *don't* want to catch is the fact that a user-defined
565 // literal like `0xf` is stringified as `15`, causing the cached token
566 // stream to not be literal `==` token-wise (ignoring spans) to the
567 // token stream we got from stringification.
569 // Instead the "probably equal" check here is "does each token
570 // recursively have the same discriminant?" We basically don't look at
571 // the token values here and assume that such fine grained token stream
572 // modifications, including adding/removing typically non-semantic
573 // tokens such as extra braces and commas, don't happen.
574 if let Some(tokens) = tokens {
575 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
578 info!("cached tokens found, but they're not \"probably equal\", \
579 going with stringified version");
581 return tokens_for_real
584 // See comments in `interpolated_to_tokenstream` for why we care about
585 // *probably* equal here rather than actual equality
586 crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
587 if mem::discriminant(self) != mem::discriminant(other) {
590 match (self, other) {
605 (&DotDotDot, &DotDotDot) |
606 (&DotDotEq, &DotDotEq) |
613 (&FatArrow, &FatArrow) |
616 (&Question, &Question) |
617 (&Whitespace, &Whitespace) |
618 (&Comment, &Comment) |
619 (&Eof, &Eof) => true,
621 (&BinOp(a), &BinOp(b)) |
622 (&BinOpEq(a), &BinOpEq(b)) => a == b,
624 (&OpenDelim(a), &OpenDelim(b)) |
625 (&CloseDelim(a), &CloseDelim(b)) => a == b,
627 (&DocComment(a), &DocComment(b)) |
628 (&Shebang(a), &Shebang(b)) => a == b,
630 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
631 (&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,
633 (&Literal(ref a, b), &Literal(ref c, d)) => {
634 b == d && a.probably_equal_for_proc_macro(c)
637 (&Interpolated(_), &Interpolated(_)) => false,
639 _ => panic!("forgot to add a token?"),
644 #[derive(Clone, RustcEncodable, RustcDecodable)]
645 /// For interpolation during macro expansion.
646 pub enum Nonterminal {
647 NtItem(P<ast::Item>),
648 NtBlock(P<ast::Block>),
651 NtExpr(P<ast::Expr>),
653 NtIdent(ast::Ident, /* is_raw */ bool),
654 NtLifetime(ast::Ident),
655 NtLiteral(P<ast::Expr>),
656 /// Stuff inside brackets for attributes
657 NtMeta(ast::MetaItem),
659 NtVis(ast::Visibility),
661 // These are not exposed to macros, but are used by quasiquote.
663 NtImplItem(ast::ImplItem),
664 NtTraitItem(ast::TraitItem),
665 NtForeignItem(ast::ForeignItem),
666 NtGenerics(ast::Generics),
667 NtWhereClause(ast::WhereClause),
671 impl PartialEq for Nonterminal {
672 fn eq(&self, rhs: &Self) -> bool {
674 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
675 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
676 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
677 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
678 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
679 // correctly based on data from AST. This will prevent them from matching each other
680 // in macros. The comparison will become possible only when each nonterminal has an
681 // attached token stream from which it was parsed.
687 impl fmt::Debug for Nonterminal {
688 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
690 NtItem(..) => f.pad("NtItem(..)"),
691 NtBlock(..) => f.pad("NtBlock(..)"),
692 NtStmt(..) => f.pad("NtStmt(..)"),
693 NtPat(..) => f.pad("NtPat(..)"),
694 NtExpr(..) => f.pad("NtExpr(..)"),
695 NtTy(..) => f.pad("NtTy(..)"),
696 NtIdent(..) => f.pad("NtIdent(..)"),
697 NtLiteral(..) => f.pad("NtLiteral(..)"),
698 NtMeta(..) => f.pad("NtMeta(..)"),
699 NtPath(..) => f.pad("NtPath(..)"),
700 NtTT(..) => f.pad("NtTT(..)"),
701 NtArm(..) => f.pad("NtArm(..)"),
702 NtImplItem(..) => f.pad("NtImplItem(..)"),
703 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
704 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
705 NtGenerics(..) => f.pad("NtGenerics(..)"),
706 NtWhereClause(..) => f.pad("NtWhereClause(..)"),
707 NtArg(..) => f.pad("NtArg(..)"),
708 NtVis(..) => f.pad("NtVis(..)"),
709 NtLifetime(..) => f.pad("NtLifetime(..)"),
714 crate fn is_op(tok: &Token) -> bool {
716 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
717 Ident(..) | Lifetime(..) | Interpolated(..) |
718 Whitespace | Comment | Shebang(..) | Eof => false,
724 pub struct LazyTokenStream(Lock<Option<TokenStream>>);
726 impl cmp::Eq for LazyTokenStream {}
727 impl PartialEq for LazyTokenStream {
728 fn eq(&self, _other: &LazyTokenStream) -> bool {
733 impl fmt::Debug for LazyTokenStream {
734 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
735 fmt::Debug::fmt(&self.clone().0.into_inner(), f)
739 impl LazyTokenStream {
741 LazyTokenStream(Lock::new(None))
744 fn force<F: FnOnce() -> TokenStream>(&self, f: F) -> TokenStream {
745 let mut opt_stream = self.0.lock();
746 if opt_stream.is_none() {
747 *opt_stream = Some(f());
749 opt_stream.clone().unwrap()
753 impl Encodable for LazyTokenStream {
754 fn encode<S: Encoder>(&self, _: &mut S) -> Result<(), S::Error> {
759 impl Decodable for LazyTokenStream {
760 fn decode<D: Decoder>(_: &mut D) -> Result<LazyTokenStream, D::Error> {
761 Ok(LazyTokenStream::new())
765 impl ::std::hash::Hash for LazyTokenStream {
766 fn hash<H: ::std::hash::Hasher>(&self, _hasher: &mut H) {}
769 fn prepend_attrs(sess: &ParseSess,
770 attrs: &[ast::Attribute],
771 tokens: Option<&tokenstream::TokenStream>,
772 span: syntax_pos::Span)
773 -> Option<tokenstream::TokenStream>
775 let tokens = tokens?;
776 if attrs.len() == 0 {
777 return Some(tokens.clone())
779 let mut builder = tokenstream::TokenStreamBuilder::new();
781 assert_eq!(attr.style, ast::AttrStyle::Outer,
782 "inner attributes should prevent cached tokens from existing");
784 if attr.is_sugared_doc {
785 let stream = parse_stream_from_source_str(
786 FileName::MacroExpansion,
787 pprust::attr_to_string(attr),
791 builder.push(stream);
795 // synthesize # [ $path $tokens ] manually here
796 let mut brackets = tokenstream::TokenStreamBuilder::new();
798 // For simple paths, push the identifier directly
799 if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
800 let ident = attr.path.segments[0].ident;
801 let token = Ident(ident, ident.as_str().starts_with("r#"));
802 brackets.push(tokenstream::TokenTree::Token(ident.span, token));
804 // ... and for more complicated paths, fall back to a reparse hack that
805 // should eventually be removed.
807 let stream = parse_stream_from_source_str(
808 FileName::MacroExpansion,
809 pprust::path_to_string(&attr.path),
813 brackets.push(stream);
816 brackets.push(attr.tokens.clone());
818 let tokens = tokenstream::Delimited {
819 delim: DelimToken::Bracket,
820 tts: brackets.build().into(),
822 // The span we list here for `#` and for `[ ... ]` are both wrong in
823 // that it encompasses more than each token, but it hopefully is "good
824 // enough" for now at least.
825 builder.push(tokenstream::TokenTree::Token(attr.span, Pound));
826 let delim_span = DelimSpan::from_single(attr.span);
827 builder.push(tokenstream::TokenTree::Delimited(delim_span, tokens));
829 builder.push(tokens.clone());
830 Some(builder.build())