1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 pub use self::BinOpToken::*;
12 pub use self::Nonterminal::*;
13 pub use self::DelimToken::*;
14 pub use self::IdentStyle::*;
16 pub use self::Token::*;
21 use util::interner::{RcStr, StrInterner};
24 use serialize::{Decodable, Decoder, Encodable, Encoder};
29 use std::path::BytesContainer;
32 #[allow(non_camel_case_types)]
33 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Show, Copy)]
48 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Show, Copy)]
50 /// A round parenthesis: `(` or `)`
52 /// A square bracket: `[` or `]`
54 /// A curly brace: `{` or `}`
58 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Show, Copy)]
60 /// `::` follows the identifier with no whitespace in-between.
65 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Show, Copy)]
72 StrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */
74 BinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */
78 pub fn short_name(&self) -> &'static str {
82 Integer(_) => "integer",
84 Str_(_) | StrRaw(..) => "str",
85 Binary(_) | BinaryRaw(..) => "binary str"
90 #[allow(non_camel_case_types)]
91 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Show)]
93 /* Expression-operator symbols. */
108 /* Structural symbols */
123 /// An opening delimeter, eg. `{`
124 OpenDelim(DelimToken),
125 /// A closing delimeter, eg. `}`
126 CloseDelim(DelimToken),
129 Literal(Lit, Option<ast::Name>),
131 /* Name components */
132 Ident(ast::Ident, IdentStyle),
134 Lifetime(ast::Ident),
136 /* For interpolation */
137 Interpolated(Nonterminal),
138 // Can be expanded into several tokens.
140 DocComment(ast::Name),
141 // In left-hand-sides of MBE macros:
142 /// Parse a nonterminal (name to bind, name of NT, styles of their idents)
143 MatchNt(ast::Ident, ast::Ident, IdentStyle, IdentStyle),
144 // In right-hand-sides of MBE macros:
145 /// A syntactic variable that will be filled in by macro expansion.
146 SubstNt(ast::Ident, IdentStyle),
148 // Junk. These carry no data because we don't really care about the data
149 // they *would* carry, and don't really want to allocate a new ident for
150 // them. Instead, users could extract that from the associated span.
162 /// Returns `true` if the token can appear at the start of an expression.
163 pub fn can_begin_expr(&self) -> bool {
165 OpenDelim(_) => true,
169 Literal(_, _) => true,
173 BinOp(Minus) => true,
176 BinOp(Or) => true, // in lambda syntax
177 OrOr => true, // in lambda syntax
179 Interpolated(NtExpr(..)) => true,
180 Interpolated(NtIdent(..)) => true,
181 Interpolated(NtBlock(..)) => true,
182 Interpolated(NtPath(..)) => true,
187 /// Returns `true` if the token is any literal
188 pub fn is_lit(&self) -> bool {
190 Literal(_, _) => true,
195 /// Returns `true` if the token is an identifier.
196 pub fn is_ident(&self) -> bool {
203 /// Returns `true` if the token is an interpolated path.
204 pub fn is_path(&self) -> bool {
206 Interpolated(NtPath(..)) => true,
211 /// Returns `true` if the token is a path that is not followed by a `::`
213 #[allow(non_upper_case_globals)]
214 pub fn is_plain_ident(&self) -> bool {
216 Ident(_, Plain) => true,
221 /// Returns `true` if the token is a lifetime.
222 pub fn is_lifetime(&self) -> bool {
224 Lifetime(..) => true,
229 /// Returns `true` if the token is either the `mut` or `const` keyword.
230 pub fn is_mutability(&self) -> bool {
231 self.is_keyword(keywords::Mut) ||
232 self.is_keyword(keywords::Const)
235 /// Maps a token to its corresponding binary operator.
236 pub fn to_binop(&self) -> Option<ast::BinOp> {
238 BinOp(Star) => Some(ast::BiMul),
239 BinOp(Slash) => Some(ast::BiDiv),
240 BinOp(Percent) => Some(ast::BiRem),
241 BinOp(Plus) => Some(ast::BiAdd),
242 BinOp(Minus) => Some(ast::BiSub),
243 BinOp(Shl) => Some(ast::BiShl),
244 BinOp(Shr) => Some(ast::BiShr),
245 BinOp(And) => Some(ast::BiBitAnd),
246 BinOp(Caret) => Some(ast::BiBitXor),
247 BinOp(Or) => Some(ast::BiBitOr),
248 Lt => Some(ast::BiLt),
249 Le => Some(ast::BiLe),
250 Ge => Some(ast::BiGe),
251 Gt => Some(ast::BiGt),
252 EqEq => Some(ast::BiEq),
253 Ne => Some(ast::BiNe),
254 AndAnd => Some(ast::BiAnd),
255 OrOr => Some(ast::BiOr),
260 /// Returns `true` if the token is a given keyword, `kw`.
261 #[allow(non_upper_case_globals)]
262 pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
264 Ident(sid, Plain) => kw.to_name() == sid.name,
269 /// Returns `true` if the token is either a special identifier, or a strict
270 /// or reserved keyword.
271 #[allow(non_upper_case_globals)]
272 pub fn is_any_keyword(&self) -> bool {
274 Ident(sid, Plain) => {
277 n == SELF_KEYWORD_NAME
278 || n == STATIC_KEYWORD_NAME
279 || n == SUPER_KEYWORD_NAME
280 || STRICT_KEYWORD_START <= n
281 && n <= RESERVED_KEYWORD_FINAL
287 /// Returns `true` if the token may not appear as an identifier.
288 #[allow(non_upper_case_globals)]
289 pub fn is_strict_keyword(&self) -> bool {
291 Ident(sid, Plain) => {
294 n == SELF_KEYWORD_NAME
295 || n == STATIC_KEYWORD_NAME
296 || n == SUPER_KEYWORD_NAME
297 || STRICT_KEYWORD_START <= n
298 && n <= STRICT_KEYWORD_FINAL
300 Ident(sid, ModName) => {
303 n != SELF_KEYWORD_NAME
304 && n != SUPER_KEYWORD_NAME
305 && STRICT_KEYWORD_START <= n
306 && n <= STRICT_KEYWORD_FINAL
312 /// Returns `true` if the token is a keyword that has been reserved for
313 /// possible future use.
314 #[allow(non_upper_case_globals)]
315 pub fn is_reserved_keyword(&self) -> bool {
317 Ident(sid, Plain) => {
320 RESERVED_KEYWORD_START <= n
321 && n <= RESERVED_KEYWORD_FINAL
327 /// Hygienic identifier equality comparison.
329 /// See `styntax::ext::mtwt`.
330 pub fn mtwt_eq(&self, other : &Token) -> bool {
331 match (self, other) {
332 (&Ident(id1,_), &Ident(id2,_)) | (&Lifetime(id1), &Lifetime(id2)) =>
333 mtwt::resolve(id1) == mtwt::resolve(id2),
339 #[deriving(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash)]
340 /// For interpolation during macro expansion.
341 pub enum Nonterminal {
342 NtItem(P<ast::Item>),
343 NtBlock(P<ast::Block>),
344 NtStmt(P<ast::Stmt>),
346 NtExpr(P<ast::Expr>),
348 NtIdent(Box<ast::Ident>, IdentStyle),
349 /// Stuff inside brackets for attributes
350 NtMeta(P<ast::MetaItem>),
351 NtPath(Box<ast::Path>),
352 NtTT(P<ast::TokenTree>), // needs P'ed to break a circularity
355 impl fmt::Show for Nonterminal {
356 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
358 NtItem(..) => f.pad("NtItem(..)"),
359 NtBlock(..) => f.pad("NtBlock(..)"),
360 NtStmt(..) => f.pad("NtStmt(..)"),
361 NtPat(..) => f.pad("NtPat(..)"),
362 NtExpr(..) => f.pad("NtExpr(..)"),
363 NtTy(..) => f.pad("NtTy(..)"),
364 NtIdent(..) => f.pad("NtIdent(..)"),
365 NtMeta(..) => f.pad("NtMeta(..)"),
366 NtPath(..) => f.pad("NtPath(..)"),
367 NtTT(..) => f.pad("NtTT(..)"),
372 // Get the first "argument"
374 ( $first:expr, $( $remainder:expr, )* ) => ( $first )
377 // Get the last "argument" (has to be done recursively to avoid phoney local ambiguity error)
379 ( $first:expr, $( $remainder:expr, )+ ) => ( last!( $( $remainder, )+ ) );
380 ( $first:expr, ) => ( $first )
383 // In this macro, there is the requirement that the name (the number) must be monotonically
384 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
385 // except starting from the next number instead of zero, and with the additional exception that
386 // special identifiers are *also* allowed (they are deduplicated in the important place, the
387 // interner), an exception which is demonstrated by "static" and "self".
388 macro_rules! declare_special_idents_and_keywords {(
389 // So now, in these rules, why is each definition parenthesised?
390 // Answer: otherwise we get a spurious local ambiguity bug on the "}"
391 pub mod special_idents {
392 $( ($si_name:expr, $si_static:ident, $si_str:expr); )*
397 $( ($sk_name:expr, $sk_variant:ident, $sk_str:expr); )*
399 $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )*
402 static STRICT_KEYWORD_START: ast::Name = first!($( ast::Name($sk_name), )*);
403 static STRICT_KEYWORD_FINAL: ast::Name = last!($( ast::Name($sk_name), )*);
404 static RESERVED_KEYWORD_START: ast::Name = first!($( ast::Name($rk_name), )*);
405 static RESERVED_KEYWORD_FINAL: ast::Name = last!($( ast::Name($rk_name), )*);
407 pub mod special_idents {
410 #[allow(non_upper_case_globals)]
411 pub const $si_static: ast::Ident = ast::Ident {
412 name: ast::Name($si_name),
418 pub mod special_names {
421 #[allow(non_upper_case_globals)]
422 pub const $si_static: ast::Name = ast::Name($si_name);
426 /// All the valid words that have meaning in the Rust language.
428 /// Rust keywords are either 'strict' or 'reserved'. Strict keywords may not
429 /// appear as identifiers at all. Reserved keywords are not used anywhere in
430 /// the language and may not appear as identifiers.
432 pub use self::Keyword::*;
442 pub fn to_name(&self) -> ast::Name {
444 $( $sk_variant => ast::Name($sk_name), )*
445 $( $rk_variant => ast::Name($rk_name), )*
451 fn mk_fresh_ident_interner() -> IdentInterner {
452 // The indices here must correspond to the numbers in
453 // special_idents, in Keyword to_name(), and in static
455 let mut init_vec = Vec::new();
456 $(init_vec.push($si_str);)*
457 $(init_vec.push($sk_str);)*
458 $(init_vec.push($rk_str);)*
459 interner::StrInterner::prefill(init_vec[])
463 // If the special idents get renumbered, remember to modify these two as appropriate
464 pub const SELF_KEYWORD_NAME: ast::Name = ast::Name(SELF_KEYWORD_NAME_NUM);
465 const STATIC_KEYWORD_NAME: ast::Name = ast::Name(STATIC_KEYWORD_NAME_NUM);
466 const SUPER_KEYWORD_NAME: ast::Name = ast::Name(SUPER_KEYWORD_NAME_NUM);
468 pub const SELF_KEYWORD_NAME_NUM: u32 = 1;
469 const STATIC_KEYWORD_NAME_NUM: u32 = 2;
470 const SUPER_KEYWORD_NAME_NUM: u32 = 3;
472 // NB: leaving holes in the ident table is bad! a different ident will get
473 // interned with the id from the hole, but it will be between the min and max
474 // of the reserved words, and thus tagged as "reserved".
476 declare_special_idents_and_keywords! {
477 pub mod special_idents {
478 // These ones are statics
480 (super::SELF_KEYWORD_NAME_NUM, self_, "self");
481 (super::STATIC_KEYWORD_NAME_NUM, statik, "static");
482 (super::SUPER_KEYWORD_NAME_NUM, super_, "super");
483 (4, static_lifetime, "'static");
487 (6, matchers, "matchers");
489 // outside of libsyntax
490 (7, clownshoe_abi, "__rust_abi");
491 (8, opaque, "<opaque>");
492 (9, unnamed_field, "<unnamed_field>");
493 (10, type_self, "Self");
494 (11, prelude_import, "prelude_import");
498 // These ones are variants of the Keyword enum
502 (13, Break, "break");
503 (14, Crate, "crate");
506 (17, Extern, "extern");
507 (18, False, "false");
515 (26, Match, "match");
521 (32, Return, "return");
522 // Static and Self are also special idents (prefill de-dupes)
523 (super::STATIC_KEYWORD_NAME_NUM, Static, "static");
524 (super::SELF_KEYWORD_NAME_NUM, Self, "self");
525 (33, Struct, "struct");
526 (super::SUPER_KEYWORD_NAME_NUM, Super, "super");
528 (35, Trait, "trait");
530 (37, Unsafe, "unsafe");
532 (39, Virtual, "virtual");
533 (40, While, "while");
534 (41, Continue, "continue");
537 (44, Const, "const");
538 (45, Where, "where");
541 (46, Alignof, "alignof");
543 (48, Offsetof, "offsetof");
546 (51, Sizeof, "sizeof");
547 (52, Typeof, "typeof");
548 (53, Unsized, "unsized");
549 (54, Yield, "yield");
551 (56, Abstract, "abstract");
552 (57, Final, "final");
553 (58, Override, "override");
557 // looks like we can get rid of this completely...
558 pub type IdentInterner = StrInterner;
560 // if an interner exists in TLS, return it. Otherwise, prepare a
562 // FIXME(eddyb) #8726 This should probably use a task-local reference.
563 pub fn get_ident_interner() -> Rc<IdentInterner> {
564 thread_local!(static KEY: Rc<::parse::token::IdentInterner> = {
565 Rc::new(mk_fresh_ident_interner())
567 KEY.with(|k| k.clone())
570 /// Reset the ident interner to its initial state.
571 pub fn reset_ident_interner() {
572 let interner = get_ident_interner();
573 interner.reset(mk_fresh_ident_interner());
576 /// Represents a string stored in the task-local interner. Because the
577 /// interner lives for the life of the task, this can be safely treated as an
578 /// immortal string, as long as it never crosses between tasks.
580 /// FIXME(pcwalton): You must be careful about what you do in the destructors
581 /// of objects stored in TLS, because they may run after the interner is
582 /// destroyed. In particular, they must not access string contents. This can
583 /// be fixed in the future by just leaking all strings until task death
585 #[deriving(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
586 pub struct InternedString {
590 impl InternedString {
592 pub fn new(string: &'static str) -> InternedString {
594 string: RcStr::new(string),
599 fn new_from_rc_str(string: RcStr) -> InternedString {
606 pub fn get<'a>(&'a self) -> &'a str {
611 impl Deref for InternedString {
614 fn deref(&self) -> &str { &*self.string }
617 impl BytesContainer for InternedString {
618 fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
619 // FIXME #12938: This is a workaround for the incorrect signature
620 // of `BytesContainer`, which is itself a workaround for the lack of
623 let this = self.get();
624 mem::transmute::<&[u8],&[u8]>(this.container_as_bytes())
629 impl fmt::Show for InternedString {
630 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
631 write!(f, "{}", self.string[])
636 impl<'a> Equiv<&'a str> for InternedString {
637 fn equiv(&self, other: & &'a str) -> bool {
638 (*other) == self.string[]
642 impl<'a> PartialEq<&'a str> for InternedString {
644 fn eq(&self, other: & &'a str) -> bool {
645 PartialEq::eq(self.string[], *other)
648 fn ne(&self, other: & &'a str) -> bool {
649 PartialEq::ne(self.string[], *other)
653 impl<'a> PartialEq<InternedString > for &'a str {
655 fn eq(&self, other: &InternedString) -> bool {
656 PartialEq::eq(*self, other.string[])
659 fn ne(&self, other: &InternedString) -> bool {
660 PartialEq::ne(*self, other.string[])
664 impl<D:Decoder<E>, E> Decodable<D, E> for InternedString {
665 fn decode(d: &mut D) -> Result<InternedString, E> {
666 Ok(get_name(get_ident_interner().intern(
667 try!(d.read_str())[])))
671 impl<S:Encoder<E>, E> Encodable<S, E> for InternedString {
672 fn encode(&self, s: &mut S) -> Result<(), E> {
673 s.emit_str(self.string[])
677 /// Returns the string contents of a name, using the task-local interner.
679 pub fn get_name(name: ast::Name) -> InternedString {
680 let interner = get_ident_interner();
681 InternedString::new_from_rc_str(interner.get(name))
684 /// Returns the string contents of an identifier, using the task-local
687 pub fn get_ident(ident: ast::Ident) -> InternedString {
691 /// Interns and returns the string contents of an identifier, using the
692 /// task-local interner.
694 pub fn intern_and_get_ident(s: &str) -> InternedString {
698 /// Maps a string to its interned representation.
700 pub fn intern(s: &str) -> ast::Name {
701 get_ident_interner().intern(s)
704 /// gensym's a new uint, using the current interner.
706 pub fn gensym(s: &str) -> ast::Name {
707 get_ident_interner().gensym(s)
710 /// Maps a string to an identifier with an empty syntax context.
712 pub fn str_to_ident(s: &str) -> ast::Ident {
713 ast::Ident::new(intern(s))
716 /// Maps a string to a gensym'ed identifier.
718 pub fn gensym_ident(s: &str) -> ast::Ident {
719 ast::Ident::new(gensym(s))
722 // create a fresh name that maps to the same string as the old one.
723 // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src)));
724 // that is, that the new name and the old one are connected to ptr_eq strings.
725 pub fn fresh_name(src: &ast::Ident) -> ast::Name {
726 let interner = get_ident_interner();
727 interner.gensym_copy(src.name)
728 // following: debug version. Could work in final except that it's incompatible with
729 // good error messages and uses of struct names in ambiguous could-be-binding
730 // locations. Also definitely destroys the guarantee given above about ptr_eq.
731 /*let num = rand::thread_rng().gen_uint_range(0,0xffff);
732 gensym(format!("{}_{}",ident_to_string(src),num))*/
735 // create a fresh mark.
736 pub fn fresh_mark() -> ast::Mrk {
737 gensym("mark").uint() as u32
746 fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident {
747 ast::Ident { name: id.name, ctxt:mtwt::apply_mark(m, id.ctxt) }
750 #[test] fn mtwt_token_eq_test() {
751 assert!(Gt.mtwt_eq(&Gt));
752 let a = str_to_ident("bac");
753 let a1 = mark_ident(a,92);
754 assert!(Ident(a, ModName).mtwt_eq(&Ident(a1, Plain)));