1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use ast::{P, Ident, Name, Mrk};
16 use util::interner::{RcStr, StrInterner};
19 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::path::BytesContainer;
25 use std::strbuf::StrBuf;
27 #[allow(non_camel_case_types)]
28 #[deriving(Clone, Encodable, Decodable, Eq, TotalEq, Hash, Show)]
42 #[allow(non_camel_case_types)]
43 #[deriving(Clone, Encodable, Decodable, Eq, TotalEq, Hash, Show)]
45 /* Expression-operator symbols. */
60 /* Structural symbols */
84 LIT_INT(i64, ast::IntTy),
85 LIT_UINT(u64, ast::UintTy),
86 LIT_INT_UNSUFFIXED(i64),
87 LIT_FLOAT(ast::Ident, ast::FloatTy),
88 LIT_FLOAT_UNSUFFIXED(ast::Ident),
90 LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
93 // an identifier contains an "is_mod_name" boolean,
94 // indicating whether :: follows this token with no
95 // whitespace in between.
96 IDENT(ast::Ident, bool),
100 /* For interpolation */
101 INTERPOLATED(Nonterminal),
103 DOC_COMMENT(ast::Ident),
107 #[deriving(Clone, Encodable, Decodable, Eq, TotalEq, Hash)]
108 /// For interpolation during macro expansion.
109 pub enum Nonterminal {
111 NtBlock(P<ast::Block>),
116 NtIdent(~ast::Ident, bool),
117 NtMeta(@ast::MetaItem), // stuff inside brackets for attributes
119 NtTT( @ast::TokenTree), // needs @ed to break a circularity
120 NtMatchers(Vec<ast::Matcher> )
123 impl fmt::Show for Nonterminal {
124 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 NtItem(..) => f.pad("NtItem(..)"),
127 NtBlock(..) => f.pad("NtBlock(..)"),
128 NtStmt(..) => f.pad("NtStmt(..)"),
129 NtPat(..) => f.pad("NtPat(..)"),
130 NtExpr(..) => f.pad("NtExpr(..)"),
131 NtTy(..) => f.pad("NtTy(..)"),
132 NtIdent(..) => f.pad("NtIdent(..)"),
133 NtMeta(..) => f.pad("NtMeta(..)"),
134 NtPath(..) => f.pad("NtPath(..)"),
135 NtTT(..) => f.pad("NtTT(..)"),
136 NtMatchers(..) => f.pad("NtMatchers(..)"),
141 pub fn binop_to_str(o: BinOp) -> ~str {
143 PLUS => "+".to_owned(),
144 MINUS => "-".to_owned(),
145 STAR => "*".to_owned(),
146 SLASH => "/".to_owned(),
147 PERCENT => "%".to_owned(),
148 CARET => "^".to_owned(),
149 AND => "&".to_owned(),
150 OR => "|".to_owned(),
151 SHL => "<<".to_owned(),
152 SHR => ">>".to_owned()
156 pub fn to_str(t: &Token) -> ~str {
158 EQ => "=".to_owned(),
159 LT => "<".to_owned(),
160 LE => "<=".to_owned(),
161 EQEQ => "==".to_owned(),
162 NE => "!=".to_owned(),
163 GE => ">=".to_owned(),
164 GT => ">".to_owned(),
165 NOT => "!".to_owned(),
166 TILDE => "~".to_owned(),
167 OROR => "||".to_owned(),
168 ANDAND => "&&".to_owned(),
169 BINOP(op) => binop_to_str(op),
170 BINOPEQ(op) => binop_to_str(op) + "=",
172 /* Structural symbols */
173 AT => "@".to_owned(),
174 DOT => ".".to_owned(),
175 DOTDOT => "..".to_owned(),
176 DOTDOTDOT => "...".to_owned(),
177 COMMA => ",".to_owned(),
178 SEMI => ";".to_owned(),
179 COLON => ":".to_owned(),
180 MOD_SEP => "::".to_owned(),
181 RARROW => "->".to_owned(),
182 LARROW => "<-".to_owned(),
183 DARROW => "<->".to_owned(),
184 FAT_ARROW => "=>".to_owned(),
185 LPAREN => "(".to_owned(),
186 RPAREN => ")".to_owned(),
187 LBRACKET => "[".to_owned(),
188 RBRACKET => "]".to_owned(),
189 LBRACE => "{".to_owned(),
190 RBRACE => "}".to_owned(),
191 POUND => "#".to_owned(),
192 DOLLAR => "$".to_owned(),
196 let mut res = StrBuf::from_str("'");
197 c.escape_default(|c| {
203 LIT_INT(i, t) => ast_util::int_ty_to_str(t, Some(i)),
204 LIT_UINT(u, t) => ast_util::uint_ty_to_str(t, Some(u)),
205 LIT_INT_UNSUFFIXED(i) => { i.to_str() }
207 let mut body = StrBuf::from_str(get_ident(s).get());
208 if body.as_slice().ends_with(".") {
209 body.push_char('0'); // `10.f` is not a float literal
211 body.push_str(ast_util::float_ty_to_str(t));
214 LIT_FLOAT_UNSUFFIXED(s) => {
215 let mut body = StrBuf::from_str(get_ident(s).get());
216 if body.as_slice().ends_with(".") {
217 body.push_char('0'); // `10.f` is not a float literal
222 format!("\"{}\"", get_ident(s).get().escape_default())
224 LIT_STR_RAW(s, n) => {
225 format!("r{delim}\"{string}\"{delim}",
226 delim="#".repeat(n), string=get_ident(s))
229 /* Name components */
230 IDENT(s, _) => get_ident(s).get().to_str(),
232 format!("'{}", get_ident(s))
234 UNDERSCORE => "_".to_owned(),
237 DOC_COMMENT(s) => get_ident(s).get().to_str(),
238 EOF => "<eof>".to_owned(),
239 INTERPOLATED(ref nt) => {
241 &NtExpr(e) => ::print::pprust::expr_to_str(e),
242 &NtMeta(e) => ::print::pprust::meta_item_to_str(e),
244 "an interpolated ".to_owned() +
246 NtItem(..) => "item".to_owned(),
247 NtBlock(..) => "block".to_owned(),
248 NtStmt(..) => "statement".to_owned(),
249 NtPat(..) => "pattern".to_owned(),
250 NtMeta(..) => fail!("should have been handled"),
251 NtExpr(..) => fail!("should have been handled above"),
252 NtTy(..) => "type".to_owned(),
253 NtIdent(..) => "identifier".to_owned(),
254 NtPath(..) => "path".to_owned(),
255 NtTT(..) => "tt".to_owned(),
256 NtMatchers(..) => "matcher sequence".to_owned()
264 pub fn can_begin_expr(t: &Token) -> bool {
273 LIT_INT(_, _) => true,
274 LIT_UINT(_, _) => true,
275 LIT_INT_UNSUFFIXED(_) => true,
276 LIT_FLOAT(_, _) => true,
277 LIT_FLOAT_UNSUFFIXED(_) => true,
279 LIT_STR_RAW(_, _) => true,
283 BINOP(MINUS) => true,
286 BINOP(OR) => true, // in lambda syntax
287 OROR => true, // in lambda syntax
289 INTERPOLATED(NtExpr(..))
290 | INTERPOLATED(NtIdent(..))
291 | INTERPOLATED(NtBlock(..))
292 | INTERPOLATED(NtPath(..)) => true,
297 /// Returns the matching close delimiter if this is an open delimiter,
298 /// otherwise `None`.
299 pub fn close_delimiter_for(t: &Token) -> Option<Token> {
301 LPAREN => Some(RPAREN),
302 LBRACE => Some(RBRACE),
303 LBRACKET => Some(RBRACKET),
308 pub fn is_lit(t: &Token) -> bool {
311 LIT_INT(_, _) => true,
312 LIT_UINT(_, _) => true,
313 LIT_INT_UNSUFFIXED(_) => true,
314 LIT_FLOAT(_, _) => true,
315 LIT_FLOAT_UNSUFFIXED(_) => true,
317 LIT_STR_RAW(_, _) => true,
322 pub fn is_ident(t: &Token) -> bool {
323 match *t { IDENT(_, _) => true, _ => false }
326 pub fn is_ident_or_path(t: &Token) -> bool {
328 IDENT(_, _) | INTERPOLATED(NtPath(..)) => true,
333 pub fn is_plain_ident(t: &Token) -> bool {
334 match *t { IDENT(_, false) => true, _ => false }
337 pub fn is_bar(t: &Token) -> bool {
338 match *t { BINOP(OR) | OROR => true, _ => false }
341 // Get the first "argument"
343 ( $first:expr, $( $remainder:expr, )* ) => ( $first )
346 // Get the last "argument" (has to be done recursively to avoid phoney local ambiguity error)
348 ( $first:expr, $( $remainder:expr, )+ ) => ( last!( $( $remainder, )+ ) );
349 ( $first:expr, ) => ( $first )
352 // In this macro, there is the requirement that the name (the number) must be monotonically
353 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
354 // except starting from the next number instead of zero, and with the additional exception that
355 // special identifiers are *also* allowed (they are deduplicated in the important place, the
356 // interner), an exception which is demonstrated by "static" and "self".
357 macro_rules! declare_special_idents_and_keywords {(
358 // So now, in these rules, why is each definition parenthesised?
359 // Answer: otherwise we get a spurious local ambiguity bug on the "}"
360 pub mod special_idents {
361 $( ($si_name:expr, $si_static:ident, $si_str:expr); )*
366 $( ($sk_name:expr, $sk_variant:ident, $sk_str:expr); )*
368 $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )*
371 static STRICT_KEYWORD_START: Name = first!($( $sk_name, )*);
372 static STRICT_KEYWORD_FINAL: Name = last!($( $sk_name, )*);
373 static RESERVED_KEYWORD_START: Name = first!($( $rk_name, )*);
374 static RESERVED_KEYWORD_FINAL: Name = last!($( $rk_name, )*);
376 pub mod special_idents {
378 $( pub static $si_static: Ident = Ident { name: $si_name, ctxt: 0 }; )*
382 * All the valid words that have meaning in the Rust language.
384 * Rust keywords are either 'strict' or 'reserved'. Strict keywords may not
385 * appear as identifiers at all. Reserved keywords are not used anywhere in
386 * the language and may not appear as identifiers.
397 pub fn to_ident(&self) -> Ident {
399 $( $sk_variant => Ident { name: $sk_name, ctxt: 0 }, )*
400 $( $rk_variant => Ident { name: $rk_name, ctxt: 0 }, )*
406 fn mk_fresh_ident_interner() -> IdentInterner {
407 // The indices here must correspond to the numbers in
408 // special_idents, in Keyword to_ident(), and in static
410 let mut init_vec = Vec::new();
411 $(init_vec.push($si_str);)*
412 $(init_vec.push($sk_str);)*
413 $(init_vec.push($rk_str);)*
414 interner::StrInterner::prefill(init_vec.as_slice())
418 // If the special idents get renumbered, remember to modify these two as appropriate
419 static SELF_KEYWORD_NAME: Name = 1;
420 static STATIC_KEYWORD_NAME: Name = 2;
422 declare_special_idents_and_keywords! {
423 pub mod special_idents {
424 // These ones are statics
426 (super::SELF_KEYWORD_NAME, self_, "self");
427 (super::STATIC_KEYWORD_NAME, statik, "static");
431 (4, matchers, "matchers");
433 // outside of libsyntax
434 (5, clownshoe_abi, "__rust_abi");
435 (6, opaque, "<opaque>");
436 (7, unnamed_field, "<unnamed_field>");
437 (8, type_self, "Self");
441 // These ones are variants of the Keyword enum
445 (10, Break, "break");
446 (11, Const, "const");
447 (12, Crate, "crate");
450 (15, Extern, "extern");
451 (16, False, "false");
459 (24, Match, "match");
465 (30, Return, "return");
466 // Static and Self are also special idents (prefill de-dupes)
467 (super::STATIC_KEYWORD_NAME, Static, "static");
468 (super::SELF_KEYWORD_NAME, Self, "self");
469 (31, Struct, "struct");
470 (32, Super, "super");
472 (34, Trait, "trait");
474 (36, Unsafe, "unsafe");
476 (38, Virtual, "virtual");
477 (39, While, "while");
478 (40, Continue, "continue");
483 (43, Alignof, "alignof");
485 (45, Offsetof, "offsetof");
488 (48, Sizeof, "sizeof");
489 (49, Typeof, "typeof");
490 (50, Unsized, "unsized");
491 (51, Yield, "yield");
497 * Maps a token to a record specifying the corresponding binary
500 pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
502 BINOP(STAR) => Some(ast::BiMul),
503 BINOP(SLASH) => Some(ast::BiDiv),
504 BINOP(PERCENT) => Some(ast::BiRem),
505 BINOP(PLUS) => Some(ast::BiAdd),
506 BINOP(MINUS) => Some(ast::BiSub),
507 BINOP(SHL) => Some(ast::BiShl),
508 BINOP(SHR) => Some(ast::BiShr),
509 BINOP(AND) => Some(ast::BiBitAnd),
510 BINOP(CARET) => Some(ast::BiBitXor),
511 BINOP(OR) => Some(ast::BiBitOr),
512 LT => Some(ast::BiLt),
513 LE => Some(ast::BiLe),
514 GE => Some(ast::BiGe),
515 GT => Some(ast::BiGt),
516 EQEQ => Some(ast::BiEq),
517 NE => Some(ast::BiNe),
518 ANDAND => Some(ast::BiAnd),
519 OROR => Some(ast::BiOr),
524 // looks like we can get rid of this completely...
525 pub type IdentInterner = StrInterner;
527 // if an interner exists in TLS, return it. Otherwise, prepare a
529 // FIXME(eddyb) #8726 This should probably use a task-local reference.
530 pub fn get_ident_interner() -> Rc<IdentInterner> {
531 local_data_key!(key: Rc<::parse::token::IdentInterner>)
532 match local_data::get(key, |k| k.map(|k| k.clone())) {
533 Some(interner) => interner,
535 let interner = Rc::new(mk_fresh_ident_interner());
536 local_data::set(key, interner.clone());
542 /// Represents a string stored in the task-local interner. Because the
543 /// interner lives for the life of the task, this can be safely treated as an
544 /// immortal string, as long as it never crosses between tasks.
546 /// FIXME(pcwalton): You must be careful about what you do in the destructors
547 /// of objects stored in TLS, because they may run after the interner is
548 /// destroyed. In particular, they must not access string contents. This can
549 /// be fixed in the future by just leaking all strings until task death
551 #[deriving(Clone, Eq, Hash, Ord, TotalEq, TotalOrd)]
552 pub struct InternedString {
556 impl InternedString {
558 pub fn new(string: &'static str) -> InternedString {
560 string: RcStr::new(string),
565 fn new_from_rc_str(string: RcStr) -> InternedString {
572 pub fn get<'a>(&'a self) -> &'a str {
573 self.string.as_slice()
577 impl BytesContainer for InternedString {
578 fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
579 // FIXME(pcwalton): This is a workaround for the incorrect signature
580 // of `BytesContainer`, which is itself a workaround for the lack of
583 let this = self.get();
584 cast::transmute(this.container_as_bytes())
589 impl fmt::Show for InternedString {
590 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
591 write!(f.buf, "{}", self.string.as_slice())
595 impl<'a> Equiv<&'a str> for InternedString {
596 fn equiv(&self, other: & &'a str) -> bool {
597 (*other) == self.string.as_slice()
601 impl<D:Decoder<E>, E> Decodable<D, E> for InternedString {
602 fn decode(d: &mut D) -> Result<InternedString, E> {
603 Ok(get_name(get_ident_interner().intern(try!(d.read_str()))))
607 impl<S:Encoder<E>, E> Encodable<S, E> for InternedString {
608 fn encode(&self, s: &mut S) -> Result<(), E> {
609 s.emit_str(self.string.as_slice())
613 /// Returns the string contents of a name, using the task-local interner.
615 pub fn get_name(name: Name) -> InternedString {
616 let interner = get_ident_interner();
617 InternedString::new_from_rc_str(interner.get(name))
620 /// Returns the string contents of an identifier, using the task-local
623 pub fn get_ident(ident: Ident) -> InternedString {
627 /// Interns and returns the string contents of an identifier, using the
628 /// task-local interner.
630 pub fn intern_and_get_ident(s: &str) -> InternedString {
634 /// Maps a string to its interned representation.
636 pub fn intern(s: &str) -> Name {
637 get_ident_interner().intern(s)
640 /// gensym's a new uint, using the current interner.
642 pub fn gensym(s: &str) -> Name {
643 get_ident_interner().gensym(s)
646 /// Maps a string to an identifier with an empty syntax context.
648 pub fn str_to_ident(s: &str) -> ast::Ident {
649 ast::Ident::new(intern(s))
652 /// Maps a string to a gensym'ed identifier.
654 pub fn gensym_ident(s: &str) -> ast::Ident {
655 ast::Ident::new(gensym(s))
658 // create a fresh name that maps to the same string as the old one.
659 // note that this guarantees that str_ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
660 // that is, that the new name and the old one are connected to ptr_eq strings.
661 pub fn fresh_name(src: &ast::Ident) -> Name {
662 let interner = get_ident_interner();
663 interner.gensym_copy(src.name)
664 // following: debug version. Could work in final except that it's incompatible with
665 // good error messages and uses of struct names in ambiguous could-be-binding
666 // locations. Also definitely destroys the guarantee given above about ptr_eq.
667 /*let num = rand::task_rng().gen_uint_range(0,0xffff);
668 gensym(format!("{}_{}",ident_to_str(src),num))*/
671 // create a fresh mark.
672 pub fn fresh_mark() -> Mrk {
676 // See the macro above about the types of keywords
678 pub fn is_keyword(kw: keywords::Keyword, tok: &Token) -> bool {
680 token::IDENT(sid, false) => { kw.to_ident().name == sid.name }
685 pub fn is_any_keyword(tok: &Token) -> bool {
687 token::IDENT(sid, false) => match sid.name {
688 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
689 STRICT_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
696 pub fn is_strict_keyword(tok: &Token) -> bool {
698 token::IDENT(sid, false) => match sid.name {
699 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
700 STRICT_KEYWORD_START .. STRICT_KEYWORD_FINAL => true,
707 pub fn is_reserved_keyword(tok: &Token) -> bool {
709 token::IDENT(sid, false) => match sid.name {
710 RESERVED_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
717 pub fn mtwt_token_eq(t1 : &Token, t2 : &Token) -> bool {
719 (&IDENT(id1,_),&IDENT(id2,_)) | (&LIFETIME(id1),&LIFETIME(id2)) =>
720 mtwt::resolve(id1) == mtwt::resolve(id2),
732 fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident {
733 ast::Ident{name:id.name,ctxt:mtwt::new_mark(m,id.ctxt)}
736 #[test] fn mtwt_token_eq_test() {
737 assert!(mtwt_token_eq(>,>));
738 let a = str_to_ident("bac");
739 let a1 = mark_ident(a,92);
740 assert!(mtwt_token_eq(&IDENT(a,true),&IDENT(a1,false)));