1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use ast::{P, Ident, Name, Mrk};
16 use util::interner::{RcStr, StrInterner};
19 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::path::BytesContainer;
26 #[allow(non_camel_case_types)]
27 #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)]
41 #[allow(non_camel_case_types)]
42 #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)]
44 /* Expression-operator symbols. */
59 /* Structural symbols */
84 LIT_INT(i64, ast::IntTy),
85 LIT_UINT(u64, ast::UintTy),
86 LIT_INT_UNSUFFIXED(i64),
87 LIT_FLOAT(ast::Ident, ast::FloatTy),
88 LIT_FLOAT_UNSUFFIXED(ast::Ident),
90 LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
91 LIT_BINARY(Rc<Vec<u8>>),
92 LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
95 // an identifier contains an "is_mod_name" boolean,
96 // indicating whether :: follows this token with no
97 // whitespace in between.
98 IDENT(ast::Ident, bool),
100 LIFETIME(ast::Ident),
102 /* For interpolation */
103 INTERPOLATED(Nonterminal),
105 DOC_COMMENT(ast::Ident),
109 #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash)]
110 /// For interpolation during macro expansion.
111 pub enum Nonterminal {
112 NtItem(Gc<ast::Item>),
113 NtBlock(P<ast::Block>),
114 NtStmt(Gc<ast::Stmt>),
115 NtPat( Gc<ast::Pat>),
116 NtExpr(Gc<ast::Expr>),
118 // see IDENT, above, for meaning of bool in NtIdent:
119 NtIdent(Box<ast::Ident>, bool),
120 NtMeta(Gc<ast::MetaItem>), // stuff inside brackets for attributes
121 NtPath(Box<ast::Path>),
122 NtTT( Gc<ast::TokenTree>), // needs @ed to break a circularity
123 NtMatchers(Vec<ast::Matcher> )
126 impl fmt::Show for Nonterminal {
127 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129 NtItem(..) => f.pad("NtItem(..)"),
130 NtBlock(..) => f.pad("NtBlock(..)"),
131 NtStmt(..) => f.pad("NtStmt(..)"),
132 NtPat(..) => f.pad("NtPat(..)"),
133 NtExpr(..) => f.pad("NtExpr(..)"),
134 NtTy(..) => f.pad("NtTy(..)"),
135 NtIdent(..) => f.pad("NtIdent(..)"),
136 NtMeta(..) => f.pad("NtMeta(..)"),
137 NtPath(..) => f.pad("NtPath(..)"),
138 NtTT(..) => f.pad("NtTT(..)"),
139 NtMatchers(..) => f.pad("NtMatchers(..)"),
144 pub fn binop_to_string(o: BinOp) -> &'static str {
159 pub fn to_string(t: &Token) -> String {
161 EQ => "=".to_string(),
162 LT => "<".to_string(),
163 LE => "<=".to_string(),
164 EQEQ => "==".to_string(),
165 NE => "!=".to_string(),
166 GE => ">=".to_string(),
167 GT => ">".to_string(),
168 NOT => "!".to_string(),
169 TILDE => "~".to_string(),
170 OROR => "||".to_string(),
171 ANDAND => "&&".to_string(),
172 BINOP(op) => binop_to_string(op).to_string(),
174 let mut s = binop_to_string(op).to_string();
179 /* Structural symbols */
180 AT => "@".to_string(),
181 DOT => ".".to_string(),
182 DOTDOT => "..".to_string(),
183 DOTDOTDOT => "...".to_string(),
184 COMMA => ",".to_string(),
185 SEMI => ";".to_string(),
186 COLON => ":".to_string(),
187 MOD_SEP => "::".to_string(),
188 RARROW => "->".to_string(),
189 LARROW => "<-".to_string(),
190 FAT_ARROW => "=>".to_string(),
191 LPAREN => "(".to_string(),
192 RPAREN => ")".to_string(),
193 LBRACKET => "[".to_string(),
194 RBRACKET => "]".to_string(),
195 LBRACE => "{".to_string(),
196 RBRACE => "}".to_string(),
197 POUND => "#".to_string(),
198 DOLLAR => "$".to_string(),
199 QUESTION => "?".to_string(),
203 let mut res = String::from_str("b'");
204 (b as char).escape_default(|c| {
211 let mut res = String::from_str("'");
212 c.escape_default(|c| {
218 LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)),
219 LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)),
220 LIT_INT_UNSUFFIXED(i) => { (i as u64).to_string() }
222 let mut body = String::from_str(get_ident(s).get());
223 if body.as_slice().ends_with(".") {
224 body.push_char('0'); // `10.f` is not a float literal
226 body.push_str(ast_util::float_ty_to_string(t).as_slice());
229 LIT_FLOAT_UNSUFFIXED(s) => {
230 let mut body = String::from_str(get_ident(s).get());
231 if body.as_slice().ends_with(".") {
232 body.push_char('0'); // `10.f` is not a float literal
237 format!("\"{}\"", get_ident(s).get().escape_default())
239 LIT_STR_RAW(s, n) => {
240 format!("r{delim}\"{string}\"{delim}",
241 delim="#".repeat(n), string=get_ident(s))
243 LIT_BINARY(ref v) => {
246 v.iter().map(|&b| b as char).collect::<String>().escape_default())
248 LIT_BINARY_RAW(ref s, n) => {
249 format!("br{delim}\"{string}\"{delim}",
250 delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
253 /* Name components */
254 IDENT(s, _) => get_ident(s).get().to_string(),
256 format!("{}", get_ident(s))
258 UNDERSCORE => "_".to_string(),
261 DOC_COMMENT(s) => get_ident(s).get().to_string(),
262 EOF => "<eof>".to_string(),
263 INTERPOLATED(ref nt) => {
265 &NtExpr(ref e) => ::print::pprust::expr_to_string(&**e),
266 &NtMeta(ref e) => ::print::pprust::meta_item_to_string(&**e),
268 let mut s = "an interpolated ".to_string();
270 NtItem(..) => s.push_str("item"),
271 NtBlock(..) => s.push_str("block"),
272 NtStmt(..) => s.push_str("statement"),
273 NtPat(..) => s.push_str("pattern"),
274 NtMeta(..) => fail!("should have been handled"),
275 NtExpr(..) => fail!("should have been handled above"),
276 NtTy(..) => s.push_str("type"),
277 NtIdent(..) => s.push_str("identifier"),
278 NtPath(..) => s.push_str("path"),
279 NtTT(..) => s.push_str("tt"),
280 NtMatchers(..) => s.push_str("matcher sequence")
289 pub fn can_begin_expr(t: &Token) -> bool {
299 LIT_INT(_, _) => true,
300 LIT_UINT(_, _) => true,
301 LIT_INT_UNSUFFIXED(_) => true,
302 LIT_FLOAT(_, _) => true,
303 LIT_FLOAT_UNSUFFIXED(_) => true,
305 LIT_STR_RAW(_, _) => true,
306 LIT_BINARY(_) => true,
307 LIT_BINARY_RAW(_, _) => true,
311 BINOP(MINUS) => true,
314 BINOP(OR) => true, // in lambda syntax
315 OROR => true, // in lambda syntax
317 INTERPOLATED(NtExpr(..))
318 | INTERPOLATED(NtIdent(..))
319 | INTERPOLATED(NtBlock(..))
320 | INTERPOLATED(NtPath(..)) => true,
325 /// Returns the matching close delimiter if this is an open delimiter,
326 /// otherwise `None`.
327 pub fn close_delimiter_for(t: &Token) -> Option<Token> {
329 LPAREN => Some(RPAREN),
330 LBRACE => Some(RBRACE),
331 LBRACKET => Some(RBRACKET),
336 pub fn is_lit(t: &Token) -> bool {
340 LIT_INT(_, _) => true,
341 LIT_UINT(_, _) => true,
342 LIT_INT_UNSUFFIXED(_) => true,
343 LIT_FLOAT(_, _) => true,
344 LIT_FLOAT_UNSUFFIXED(_) => true,
346 LIT_STR_RAW(_, _) => true,
347 LIT_BINARY(_) => true,
348 LIT_BINARY_RAW(_, _) => true,
353 pub fn is_ident(t: &Token) -> bool {
354 match *t { IDENT(_, _) => true, _ => false }
357 pub fn is_ident_or_path(t: &Token) -> bool {
359 IDENT(_, _) | INTERPOLATED(NtPath(..)) => true,
364 pub fn is_plain_ident(t: &Token) -> bool {
365 match *t { IDENT(_, false) => true, _ => false }
368 pub fn is_bar(t: &Token) -> bool {
369 match *t { BINOP(OR) | OROR => true, _ => false }
372 // Get the first "argument"
374 ( $first:expr, $( $remainder:expr, )* ) => ( $first )
377 // Get the last "argument" (has to be done recursively to avoid phoney local ambiguity error)
379 ( $first:expr, $( $remainder:expr, )+ ) => ( last!( $( $remainder, )+ ) );
380 ( $first:expr, ) => ( $first )
383 // In this macro, there is the requirement that the name (the number) must be monotonically
384 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
385 // except starting from the next number instead of zero, and with the additional exception that
386 // special identifiers are *also* allowed (they are deduplicated in the important place, the
387 // interner), an exception which is demonstrated by "static" and "self".
388 macro_rules! declare_special_idents_and_keywords {(
389 // So now, in these rules, why is each definition parenthesised?
390 // Answer: otherwise we get a spurious local ambiguity bug on the "}"
391 pub mod special_idents {
392 $( ($si_name:expr, $si_static:ident, $si_str:expr); )*
397 $( ($sk_name:expr, $sk_variant:ident, $sk_str:expr); )*
399 $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )*
402 static STRICT_KEYWORD_START: Name = first!($( $sk_name, )*);
403 static STRICT_KEYWORD_FINAL: Name = last!($( $sk_name, )*);
404 static RESERVED_KEYWORD_START: Name = first!($( $rk_name, )*);
405 static RESERVED_KEYWORD_FINAL: Name = last!($( $rk_name, )*);
407 pub mod special_idents {
409 $( pub static $si_static: Ident = Ident { name: $si_name, ctxt: 0 }; )*
413 * All the valid words that have meaning in the Rust language.
415 * Rust keywords are either 'strict' or 'reserved'. Strict keywords may not
416 * appear as identifiers at all. Reserved keywords are not used anywhere in
417 * the language and may not appear as identifiers.
428 pub fn to_ident(&self) -> Ident {
430 $( $sk_variant => Ident { name: $sk_name, ctxt: 0 }, )*
431 $( $rk_variant => Ident { name: $rk_name, ctxt: 0 }, )*
437 fn mk_fresh_ident_interner() -> IdentInterner {
438 // The indices here must correspond to the numbers in
439 // special_idents, in Keyword to_ident(), and in static
441 let mut init_vec = Vec::new();
442 $(init_vec.push($si_str);)*
443 $(init_vec.push($sk_str);)*
444 $(init_vec.push($rk_str);)*
445 interner::StrInterner::prefill(init_vec.as_slice())
449 // If the special idents get renumbered, remember to modify these two as appropriate
450 static SELF_KEYWORD_NAME: Name = 1;
451 static STATIC_KEYWORD_NAME: Name = 2;
453 // NB: leaving holes in the ident table is bad! a different ident will get
454 // interned with the id from the hole, but it will be between the min and max
455 // of the reserved words, and thus tagged as "reserved".
457 declare_special_idents_and_keywords! {
458 pub mod special_idents {
459 // These ones are statics
461 (super::SELF_KEYWORD_NAME, self_, "self");
462 (super::STATIC_KEYWORD_NAME, statik, "static");
463 (3, static_lifetime, "'static");
467 (5, matchers, "matchers");
469 // outside of libsyntax
470 (6, clownshoe_abi, "__rust_abi");
471 (7, opaque, "<opaque>");
472 (8, unnamed_field, "<unnamed_field>");
473 (9, type_self, "Self");
477 // These ones are variants of the Keyword enum
481 (11, Break, "break");
482 (12, Crate, "crate");
485 (15, Extern, "extern");
486 (16, False, "false");
494 (24, Match, "match");
500 (30, Return, "return");
501 // Static and Self are also special idents (prefill de-dupes)
502 (super::STATIC_KEYWORD_NAME, Static, "static");
503 (super::SELF_KEYWORD_NAME, Self, "self");
504 (31, Struct, "struct");
505 (32, Super, "super");
507 (34, Trait, "trait");
509 (36, Unsafe, "unsafe");
511 (38, Virtual, "virtual");
512 (39, While, "while");
513 (40, Continue, "continue");
516 (43, Const, "const");
519 (44, Alignof, "alignof");
521 (46, Offsetof, "offsetof");
524 (49, Sizeof, "sizeof");
525 (50, Typeof, "typeof");
526 (51, Unsized, "unsized");
527 (52, Yield, "yield");
533 * Maps a token to a record specifying the corresponding binary
536 pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
538 BINOP(STAR) => Some(ast::BiMul),
539 BINOP(SLASH) => Some(ast::BiDiv),
540 BINOP(PERCENT) => Some(ast::BiRem),
541 BINOP(PLUS) => Some(ast::BiAdd),
542 BINOP(MINUS) => Some(ast::BiSub),
543 BINOP(SHL) => Some(ast::BiShl),
544 BINOP(SHR) => Some(ast::BiShr),
545 BINOP(AND) => Some(ast::BiBitAnd),
546 BINOP(CARET) => Some(ast::BiBitXor),
547 BINOP(OR) => Some(ast::BiBitOr),
548 LT => Some(ast::BiLt),
549 LE => Some(ast::BiLe),
550 GE => Some(ast::BiGe),
551 GT => Some(ast::BiGt),
552 EQEQ => Some(ast::BiEq),
553 NE => Some(ast::BiNe),
554 ANDAND => Some(ast::BiAnd),
555 OROR => Some(ast::BiOr),
560 // looks like we can get rid of this completely...
561 pub type IdentInterner = StrInterner;
563 // if an interner exists in TLS, return it. Otherwise, prepare a
565 // FIXME(eddyb) #8726 This should probably use a task-local reference.
566 pub fn get_ident_interner() -> Rc<IdentInterner> {
567 local_data_key!(key: Rc<::parse::token::IdentInterner>)
569 Some(interner) => interner.clone(),
571 let interner = Rc::new(mk_fresh_ident_interner());
572 key.replace(Some(interner.clone()));
578 /// Represents a string stored in the task-local interner. Because the
579 /// interner lives for the life of the task, this can be safely treated as an
580 /// immortal string, as long as it never crosses between tasks.
582 /// FIXME(pcwalton): You must be careful about what you do in the destructors
583 /// of objects stored in TLS, because they may run after the interner is
584 /// destroyed. In particular, they must not access string contents. This can
585 /// be fixed in the future by just leaking all strings until task death
587 #[deriving(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
588 pub struct InternedString {
592 impl InternedString {
594 pub fn new(string: &'static str) -> InternedString {
596 string: RcStr::new(string),
601 fn new_from_rc_str(string: RcStr) -> InternedString {
608 pub fn get<'a>(&'a self) -> &'a str {
609 self.string.as_slice()
613 impl BytesContainer for InternedString {
614 fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
615 // FIXME(pcwalton): This is a workaround for the incorrect signature
616 // of `BytesContainer`, which is itself a workaround for the lack of
619 let this = self.get();
620 mem::transmute(this.container_as_bytes())
625 impl fmt::Show for InternedString {
626 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
627 write!(f, "{}", self.string.as_slice())
631 impl<'a> Equiv<&'a str> for InternedString {
632 fn equiv(&self, other: & &'a str) -> bool {
633 (*other) == self.string.as_slice()
637 impl<D:Decoder<E>, E> Decodable<D, E> for InternedString {
638 fn decode(d: &mut D) -> Result<InternedString, E> {
639 Ok(get_name(get_ident_interner().intern(
640 try!(d.read_str()).as_slice())))
644 impl<S:Encoder<E>, E> Encodable<S, E> for InternedString {
645 fn encode(&self, s: &mut S) -> Result<(), E> {
646 s.emit_str(self.string.as_slice())
650 /// Returns the string contents of a name, using the task-local interner.
652 pub fn get_name(name: Name) -> InternedString {
653 let interner = get_ident_interner();
654 InternedString::new_from_rc_str(interner.get(name))
657 /// Returns the string contents of an identifier, using the task-local
660 pub fn get_ident(ident: Ident) -> InternedString {
664 /// Interns and returns the string contents of an identifier, using the
665 /// task-local interner.
667 pub fn intern_and_get_ident(s: &str) -> InternedString {
671 /// Maps a string to its interned representation.
673 pub fn intern(s: &str) -> Name {
674 get_ident_interner().intern(s)
677 /// gensym's a new uint, using the current interner.
679 pub fn gensym(s: &str) -> Name {
680 get_ident_interner().gensym(s)
683 /// Maps a string to an identifier with an empty syntax context.
685 pub fn str_to_ident(s: &str) -> ast::Ident {
686 ast::Ident::new(intern(s))
689 /// Maps a string to a gensym'ed identifier.
691 pub fn gensym_ident(s: &str) -> ast::Ident {
692 ast::Ident::new(gensym(s))
695 // create a fresh name that maps to the same string as the old one.
696 // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src)));
697 // that is, that the new name and the old one are connected to ptr_eq strings.
698 pub fn fresh_name(src: &ast::Ident) -> Name {
699 let interner = get_ident_interner();
700 interner.gensym_copy(src.name)
701 // following: debug version. Could work in final except that it's incompatible with
702 // good error messages and uses of struct names in ambiguous could-be-binding
703 // locations. Also definitely destroys the guarantee given above about ptr_eq.
704 /*let num = rand::task_rng().gen_uint_range(0,0xffff);
705 gensym(format!("{}_{}",ident_to_string(src),num))*/
708 // create a fresh mark.
709 pub fn fresh_mark() -> Mrk {
713 // See the macro above about the types of keywords
715 pub fn is_keyword(kw: keywords::Keyword, tok: &Token) -> bool {
717 token::IDENT(sid, false) => { kw.to_ident().name == sid.name }
722 pub fn is_any_keyword(tok: &Token) -> bool {
724 token::IDENT(sid, false) => match sid.name {
725 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
726 STRICT_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
733 pub fn is_strict_keyword(tok: &Token) -> bool {
735 token::IDENT(sid, false) => match sid.name {
736 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
737 STRICT_KEYWORD_START .. STRICT_KEYWORD_FINAL => true,
744 pub fn is_reserved_keyword(tok: &Token) -> bool {
746 token::IDENT(sid, false) => match sid.name {
747 RESERVED_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
754 pub fn mtwt_token_eq(t1 : &Token, t2 : &Token) -> bool {
756 (&IDENT(id1,_),&IDENT(id2,_)) | (&LIFETIME(id1),&LIFETIME(id2)) =>
757 mtwt::resolve(id1) == mtwt::resolve(id2),
769 fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident {
770 ast::Ident{name:id.name,ctxt:mtwt::apply_mark(m,id.ctxt)}
773 #[test] fn mtwt_token_eq_test() {
774 assert!(mtwt_token_eq(>,>));
775 let a = str_to_ident("bac");
776 let a1 = mark_ident(a,92);
777 assert!(mtwt_token_eq(&IDENT(a,true),&IDENT(a1,false)));