1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 pub use self::BinOpToken::*;
12 pub use self::Nonterminal::*;
13 pub use self::DelimToken::*;
15 pub use self::Token::*;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use syntax::parse::parse_stream_from_source_str;
24 use syntax_pos::{self, Span, FileName};
25 use tokenstream::{TokenStream, TokenTree};
30 use rustc_data_structures::sync::{Lrc, Lock};
32 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
47 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
49 /// A round parenthesis: `(` or `)`
51 /// A square bracket: `[` or `]`
53 /// A curly brace: `{` or `}`
55 /// An empty delimiter
60 pub fn len(self) -> usize {
61 if self == NoDelim { 0 } else { 1 }
64 pub fn is_empty(self) -> bool {
69 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
76 StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */
78 ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */
82 pub fn short_name(&self) -> &'static str {
86 Integer(_) => "integer",
88 Str_(_) | StrRaw(..) => "string",
89 ByteStr(_) | ByteStrRaw(..) => "byte string"
93 // See comments in `interpolated_to_tokenstream` for why we care about
94 // *probably* equal here rather than actual equality
95 fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
96 mem::discriminant(self) == mem::discriminant(other)
100 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
101 let ident_token: Token = Ident(ident, is_raw);
103 !ident_token.is_reserved_ident() ||
104 ident_token.is_path_segment_keyword() ||
107 keywords::Box.name(),
108 keywords::Break.name(),
109 keywords::Continue.name(),
110 keywords::False.name(),
111 keywords::For.name(),
113 keywords::Loop.name(),
114 keywords::Match.name(),
115 keywords::Move.name(),
116 keywords::Return.name(),
117 keywords::True.name(),
118 keywords::Unsafe.name(),
119 keywords::While.name(),
120 keywords::Yield.name(),
121 keywords::Static.name(),
122 ].contains(&ident.name)
125 fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
126 let ident_token: Token = Ident(ident, is_raw);
128 !ident_token.is_reserved_ident() ||
129 ident_token.is_path_segment_keyword() ||
131 keywords::Underscore.name(),
132 keywords::For.name(),
133 keywords::Impl.name(),
135 keywords::Unsafe.name(),
136 keywords::Extern.name(),
137 keywords::Typeof.name(),
138 ].contains(&ident.name)
141 pub fn is_path_segment_keyword(id: ast::Ident) -> bool {
142 id.name == keywords::Super.name() ||
143 id.name == keywords::SelfValue.name() ||
144 id.name == keywords::SelfType.name() ||
145 id.name == keywords::Extern.name() ||
146 id.name == keywords::Crate.name() ||
147 id.name == keywords::CrateRoot.name() ||
148 id.name == keywords::DollarCrate.name()
151 // We see this identifier in a normal identifier position, like variable name or a type.
152 // How was it written originally? Did it use the raw form? Let's try to guess.
153 pub fn is_raw_guess(ident: ast::Ident) -> bool {
154 ident.name != keywords::Invalid.name() &&
155 is_reserved_ident(ident) && !is_path_segment_keyword(ident)
158 // Returns true for reserved identifiers used internally for elided lifetimes,
159 // unnamed method parameters, crate root module, error recovery etc.
160 pub fn is_special_ident(id: ast::Ident) -> bool {
161 id.name <= keywords::Underscore.name()
164 /// Returns `true` if the token is a keyword used in the language.
165 pub fn is_used_keyword(id: ast::Ident) -> bool {
166 id.name >= keywords::As.name() && id.name <= keywords::While.name()
169 /// Returns `true` if the token is a keyword reserved for possible future use.
170 pub fn is_unused_keyword(id: ast::Ident) -> bool {
171 id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name()
174 /// Returns `true` if the token is either a special identifier or a keyword.
175 pub fn is_reserved_ident(id: ast::Ident) -> bool {
176 is_special_ident(id) || is_used_keyword(id) || is_unused_keyword(id)
179 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug)]
181 /* Expression-operator symbols. */
196 /* Structural symbols */
202 DotEq, // HACK(durka42) never produced by the parser, only used for libproc_macro
213 /// An opening delimiter, eg. `{`
214 OpenDelim(DelimToken),
215 /// A closing delimiter, eg. `}`
216 CloseDelim(DelimToken),
219 Literal(Lit, Option<ast::Name>),
221 /* Name components */
222 Ident(ast::Ident, /* is_raw */ bool),
223 Lifetime(ast::Ident),
225 // The `LazyTokenStream` is a pure function of the `Nonterminal`,
226 // and so the `LazyTokenStream` can be ignored by Eq, Hash, etc.
227 Interpolated(Lrc<(Nonterminal, LazyTokenStream)>),
228 // Can be expanded into several tokens.
230 DocComment(ast::Name),
232 // Junk. These carry no data because we don't really care about the data
233 // they *would* carry, and don't really want to allocate a new ident for
234 // them. Instead, users could extract that from the associated span.
246 pub fn interpolated(nt: Nonterminal) -> Token {
247 Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))
250 /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
251 pub fn from_ast_ident(ident: ast::Ident) -> Token {
252 Ident(ident, is_raw_guess(ident))
255 /// Returns `true` if the token starts with '>'.
256 pub fn is_like_gt(&self) -> bool {
258 BinOp(Shr) | BinOpEq(Shr) | Gt | Ge => true,
263 /// Returns `true` if the token can appear at the start of an expression.
264 pub fn can_begin_expr(&self) -> bool {
266 Ident(ident, is_raw) =>
267 ident_can_begin_expr(ident, is_raw), // value name or keyword
268 OpenDelim(..) | // tuple, array or block
269 Literal(..) | // literal
270 Not | // operator not
271 BinOp(Minus) | // unary minus
272 BinOp(Star) | // dereference
273 BinOp(Or) | OrOr | // closure
274 BinOp(And) | // reference
275 AndAnd | // double reference
276 // DotDotDot is no longer supported, but we need some way to display the error
277 DotDot | DotDotDot | DotDotEq | // range notation
278 Lt | BinOp(Shl) | // associated path
279 ModSep | // global path
280 Lifetime(..) | // labeled loop
281 Pound => true, // expression attributes
282 Interpolated(ref nt) => match nt.0 {
283 NtIdent(..) | NtExpr(..) | NtBlock(..) | NtPath(..) | NtLifetime(..) => true,
290 /// Returns `true` if the token can appear at the start of a type.
291 pub fn can_begin_type(&self) -> bool {
293 Ident(ident, is_raw) =>
294 ident_can_begin_type(ident, is_raw), // type name or keyword
295 OpenDelim(Paren) | // tuple
296 OpenDelim(Bracket) | // array
298 BinOp(Star) | // raw pointer
299 BinOp(And) | // reference
300 AndAnd | // double reference
301 Question | // maybe bound in trait object
302 Lifetime(..) | // lifetime bound in trait object
303 Lt | BinOp(Shl) | // associated path
304 ModSep => true, // global path
305 Interpolated(ref nt) => match nt.0 {
306 NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
313 /// Returns `true` if the token can appear at the start of a generic bound.
314 pub fn can_begin_bound(&self) -> bool {
315 self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) ||
316 self == &Question || self == &OpenDelim(Paren)
319 /// Returns `true` if the token is any literal
320 pub fn is_lit(&self) -> bool {
327 /// Returns an identifier if this token is an identifier.
328 pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
330 Ident(ident, is_raw) => Some((ident, is_raw)),
331 Interpolated(ref nt) => match nt.0 {
332 NtIdent(ident, is_raw) => Some((ident, is_raw)),
338 /// Returns a lifetime identifier if this token is a lifetime.
339 pub fn lifetime(&self) -> Option<ast::Ident> {
341 Lifetime(ident) => Some(ident),
342 Interpolated(ref nt) => match nt.0 {
343 NtLifetime(ident) => Some(ident),
349 /// Returns `true` if the token is an identifier.
350 pub fn is_ident(&self) -> bool {
351 self.ident().is_some()
353 /// Returns `true` if the token is a lifetime.
354 pub fn is_lifetime(&self) -> bool {
355 self.lifetime().is_some()
358 /// Returns `true` if the token is a identifier whose name is the given
360 pub fn is_ident_named(&self, name: &str) -> bool {
362 Some((ident, _)) => ident.name.as_str() == name,
367 /// Returns `true` if the token is a documentation comment.
368 pub fn is_doc_comment(&self) -> bool {
370 DocComment(..) => true,
375 /// Returns `true` if the token is interpolated.
376 pub fn is_interpolated(&self) -> bool {
378 Interpolated(..) => true,
383 /// Returns `true` if the token is an interpolated path.
384 pub fn is_path(&self) -> bool {
385 if let Interpolated(ref nt) = *self {
386 if let NtPath(..) = nt.0 {
393 /// Returns `true` if the token is either the `mut` or `const` keyword.
394 pub fn is_mutability(&self) -> bool {
395 self.is_keyword(keywords::Mut) ||
396 self.is_keyword(keywords::Const)
399 pub fn is_qpath_start(&self) -> bool {
400 self == &Lt || self == &BinOp(Shl)
403 pub fn is_path_start(&self) -> bool {
404 self == &ModSep || self.is_qpath_start() || self.is_path() ||
405 self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
408 /// Returns `true` if the token is a given keyword, `kw`.
409 pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
410 self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false)
413 pub fn is_path_segment_keyword(&self) -> bool {
415 Some((id, false)) => is_path_segment_keyword(id),
420 // Returns true for reserved identifiers used internally for elided lifetimes,
421 // unnamed method parameters, crate root module, error recovery etc.
422 pub fn is_special_ident(&self) -> bool {
424 Some((id, false)) => is_special_ident(id),
429 /// Returns `true` if the token is a keyword used in the language.
430 pub fn is_used_keyword(&self) -> bool {
432 Some((id, false)) => is_used_keyword(id),
437 /// Returns `true` if the token is a keyword reserved for possible future use.
438 pub fn is_unused_keyword(&self) -> bool {
440 Some((id, false)) => is_unused_keyword(id),
445 /// Returns `true` if the token is either a special identifier or a keyword.
446 pub fn is_reserved_ident(&self) -> bool {
448 Some((id, false)) => is_reserved_ident(id),
453 pub fn glue(self, joint: Token) -> Option<Token> {
464 BinOp(Minus) => LArrow,
477 BinOp(op) => match joint {
479 BinOp(And) if op == And => AndAnd,
480 BinOp(Or) if op == Or => OrOr,
481 Gt if op == Minus => RArrow,
490 DotDot => match joint {
495 Colon => match joint {
500 Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot | DotEq |
501 DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
502 Question | OpenDelim(..) | CloseDelim(..) => return None,
504 Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
505 Whitespace | Comment | Shebang(..) | Eof => return None,
509 /// Returns tokens that are likely to be typed accidentally instead of the current token.
510 /// Enables better error recovery when the wrong token is found.
511 pub fn similar_tokens(&self) -> Option<Vec<Token>> {
513 Comma => Some(vec![Dot, Lt]),
514 Semi => Some(vec![Colon]),
519 pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
522 let nt = match *self {
523 Token::Interpolated(ref nt) => nt,
524 _ => panic!("only works on interpolated tokens"),
527 // An `Interpolated` token means that we have a `Nonterminal`
528 // which is often a parsed AST item. At this point we now need
529 // to convert the parsed AST to an actual token stream, e.g.
530 // un-parse it basically.
532 // Unfortunately there's not really a great way to do that in a
533 // guaranteed lossless fashion right now. The fallback here is
534 // to just stringify the AST node and reparse it, but this loses
535 // all span information.
537 // As a result, some AST nodes are annotated with the token
538 // stream they came from. Here we attempt to extract these
539 // lossless token streams before we fall back to the
541 let mut tokens = None;
544 Nonterminal::NtItem(ref item) => {
545 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
547 Nonterminal::NtTraitItem(ref item) => {
548 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
550 Nonterminal::NtImplItem(ref item) => {
551 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
553 Nonterminal::NtIdent(ident, is_raw) => {
554 let token = Token::Ident(ident, is_raw);
555 tokens = Some(TokenTree::Token(ident.span, token).into());
557 Nonterminal::NtLifetime(ident) => {
558 let token = Token::Lifetime(ident);
559 tokens = Some(TokenTree::Token(ident.span, token).into());
561 Nonterminal::NtTT(ref tt) => {
562 tokens = Some(tt.clone().into());
567 let tokens_for_real = nt.1.force(|| {
568 // FIXME(#43081): Avoid this pretty-print + reparse hack
569 let source = pprust::token_to_string(self);
570 parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
573 // During early phases of the compiler the AST could get modified
574 // directly (e.g. attributes added or removed) and the internal cache
575 // of tokens my not be invalidated or updated. Consequently if the
576 // "lossless" token stream disagrees with our actual stringification
577 // (which has historically been much more battle-tested) then we go
578 // with the lossy stream anyway (losing span information).
580 // Note that the comparison isn't `==` here to avoid comparing spans,
581 // but it *also* is a "probable" equality which is a pretty weird
582 // definition. We mostly want to catch actual changes to the AST
583 // like a `#[cfg]` being processed or some weird `macro_rules!`
586 // What we *don't* want to catch is the fact that a user-defined
587 // literal like `0xf` is stringified as `15`, causing the cached token
588 // stream to not be literal `==` token-wise (ignoring spans) to the
589 // token stream we got from stringification.
591 // Instead the "probably equal" check here is "does each token
592 // recursively have the same discriminant?" We basically don't look at
593 // the token values here and assume that such fine grained modifications
594 // of token streams doesn't happen.
595 if let Some(tokens) = tokens {
596 if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
600 return tokens_for_real
603 // See comments in `interpolated_to_tokenstream` for why we care about
604 // *probably* equal here rather than actual equality
605 pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
606 if mem::discriminant(self) != mem::discriminant(other) {
609 match (self, other) {
624 (&DotDotDot, &DotDotDot) |
625 (&DotDotEq, &DotDotEq) |
633 (&FatArrow, &FatArrow) |
636 (&Question, &Question) |
637 (&Whitespace, &Whitespace) |
638 (&Comment, &Comment) |
639 (&Eof, &Eof) => true,
641 (&BinOp(a), &BinOp(b)) |
642 (&BinOpEq(a), &BinOpEq(b)) => a == b,
644 (&OpenDelim(a), &OpenDelim(b)) |
645 (&CloseDelim(a), &CloseDelim(b)) => a == b,
647 (&DocComment(a), &DocComment(b)) |
648 (&Shebang(a), &Shebang(b)) => a == b,
650 (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
651 (&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,
653 (&Literal(ref a, b), &Literal(ref c, d)) => {
654 b == d && a.probably_equal_for_proc_macro(c)
657 (&Interpolated(_), &Interpolated(_)) => false,
659 _ => panic!("forgot to add a token?"),
664 #[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
665 /// For interpolation during macro expansion.
666 pub enum Nonterminal {
667 NtItem(P<ast::Item>),
668 NtBlock(P<ast::Block>),
671 NtExpr(P<ast::Expr>),
673 NtIdent(ast::Ident, /* is_raw */ bool),
674 NtLifetime(ast::Ident),
675 /// Stuff inside brackets for attributes
676 NtMeta(ast::MetaItem),
678 NtVis(ast::Visibility),
680 // These are not exposed to macros, but are used by quasiquote.
682 NtImplItem(ast::ImplItem),
683 NtTraitItem(ast::TraitItem),
684 NtForeignItem(ast::ForeignItem),
685 NtGenerics(ast::Generics),
686 NtWhereClause(ast::WhereClause),
690 impl PartialEq for Nonterminal {
691 fn eq(&self, rhs: &Self) -> bool {
693 (NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
694 ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
695 (NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
696 (NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
697 // FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
698 // correctly based on data from AST. This will prevent them from matching each other
699 // in macros. The comparison will become possible only when each nonterminal has an
700 // attached token stream from which it was parsed.
706 impl fmt::Debug for Nonterminal {
707 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
709 NtItem(..) => f.pad("NtItem(..)"),
710 NtBlock(..) => f.pad("NtBlock(..)"),
711 NtStmt(..) => f.pad("NtStmt(..)"),
712 NtPat(..) => f.pad("NtPat(..)"),
713 NtExpr(..) => f.pad("NtExpr(..)"),
714 NtTy(..) => f.pad("NtTy(..)"),
715 NtIdent(..) => f.pad("NtIdent(..)"),
716 NtMeta(..) => f.pad("NtMeta(..)"),
717 NtPath(..) => f.pad("NtPath(..)"),
718 NtTT(..) => f.pad("NtTT(..)"),
719 NtArm(..) => f.pad("NtArm(..)"),
720 NtImplItem(..) => f.pad("NtImplItem(..)"),
721 NtTraitItem(..) => f.pad("NtTraitItem(..)"),
722 NtForeignItem(..) => f.pad("NtForeignItem(..)"),
723 NtGenerics(..) => f.pad("NtGenerics(..)"),
724 NtWhereClause(..) => f.pad("NtWhereClause(..)"),
725 NtArg(..) => f.pad("NtArg(..)"),
726 NtVis(..) => f.pad("NtVis(..)"),
727 NtLifetime(..) => f.pad("NtLifetime(..)"),
732 pub fn is_op(tok: &Token) -> bool {
734 OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
735 Ident(..) | Lifetime(..) | Interpolated(..) |
736 Whitespace | Comment | Shebang(..) | Eof => false,
742 pub struct LazyTokenStream(Lock<Option<TokenStream>>);
744 impl cmp::Eq for LazyTokenStream {}
745 impl PartialEq for LazyTokenStream {
746 fn eq(&self, _other: &LazyTokenStream) -> bool {
751 impl fmt::Debug for LazyTokenStream {
752 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
753 fmt::Debug::fmt(&self.clone().0.into_inner(), f)
757 impl LazyTokenStream {
758 pub fn new() -> Self {
759 LazyTokenStream(Lock::new(None))
762 pub fn force<F: FnOnce() -> TokenStream>(&self, f: F) -> TokenStream {
763 let mut opt_stream = self.0.lock();
764 if opt_stream.is_none() {
765 *opt_stream = Some(f());
767 opt_stream.clone().unwrap()
771 impl Encodable for LazyTokenStream {
772 fn encode<S: Encoder>(&self, _: &mut S) -> Result<(), S::Error> {
777 impl Decodable for LazyTokenStream {
778 fn decode<D: Decoder>(_: &mut D) -> Result<LazyTokenStream, D::Error> {
779 Ok(LazyTokenStream::new())
783 impl ::std::hash::Hash for LazyTokenStream {
784 fn hash<H: ::std::hash::Hasher>(&self, _hasher: &mut H) {}
787 fn prepend_attrs(sess: &ParseSess,
788 attrs: &[ast::Attribute],
789 tokens: Option<&tokenstream::TokenStream>,
790 span: syntax_pos::Span)
791 -> Option<tokenstream::TokenStream>
793 let tokens = tokens?;
794 if attrs.len() == 0 {
795 return Some(tokens.clone())
797 let mut builder = tokenstream::TokenStreamBuilder::new();
799 assert_eq!(attr.style, ast::AttrStyle::Outer,
800 "inner attributes should prevent cached tokens from existing");
801 // FIXME: Avoid this pretty-print + reparse hack as bove
802 let name = FileName::MacroExpansion;
803 let source = pprust::attr_to_string(attr);
804 let stream = parse_stream_from_source_str(name, source, sess, Some(span));
805 builder.push(stream);
807 builder.push(tokens.clone());
808 Some(builder.build())