1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use ast::{P, Name, Mrk};
15 use util::interner::StrInterner;
18 use extra::serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::path::BytesContainer;
25 #[allow(non_camel_case_types)]
26 #[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
40 #[allow(non_camel_case_types)]
41 #[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
43 /* Expression-operator symbols. */
58 /* Structural symbols */
82 LIT_INT(i64, ast::IntTy),
83 LIT_UINT(u64, ast::UintTy),
84 LIT_INT_UNSUFFIXED(i64),
85 LIT_FLOAT(ast::Ident, ast::FloatTy),
86 LIT_FLOAT_UNSUFFIXED(ast::Ident),
88 LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
91 // an identifier contains an "is_mod_name" boolean,
92 // indicating whether :: follows this token with no
93 // whitespace in between.
94 IDENT(ast::Ident, bool),
98 /* For interpolation */
99 INTERPOLATED(Nonterminal),
101 DOC_COMMENT(ast::Ident),
105 #[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
106 /// For interpolation during macro expansion.
107 pub enum Nonterminal {
109 NtBlock(P<ast::Block>),
114 NtIdent(~ast::Ident, bool),
115 NtAttr(@ast::Attribute), // #[foo]
117 NtTT( @ast::TokenTree), // needs @ed to break a circularity
118 NtMatchers(~[ast::Matcher])
121 pub fn binop_to_str(o: BinOp) -> ~str {
136 pub fn to_str(input: @IdentInterner, t: &Token) -> ~str {
149 BINOP(op) => binop_to_str(op),
150 BINOPEQ(op) => binop_to_str(op) + "=",
152 /* Structural symbols */
177 char::from_u32(c).unwrap().escape_default(|c| {
184 i.to_str() + ast_util::int_ty_to_str(t)
187 u.to_str() + ast_util::uint_ty_to_str(t)
189 LIT_INT_UNSUFFIXED(i) => { i.to_str() }
190 LIT_FLOAT(ref s, t) => {
191 let body_string = get_ident(s.name);
192 let mut body = body_string.get().to_str();
193 if body.ends_with(".") {
194 body.push_char('0'); // `10.f` is not a float literal
196 body + ast_util::float_ty_to_str(t)
198 LIT_FLOAT_UNSUFFIXED(ref s) => {
199 let body_string = get_ident(s.name);
200 let mut body = body_string.get().to_owned();
201 if body.ends_with(".") {
202 body.push_char('0'); // `10.f` is not a float literal
207 let literal_string = get_ident(s.name);
208 format!("\"{}\"", literal_string.get().escape_default())
210 LIT_STR_RAW(ref s, n) => {
211 let literal_string = get_ident(s.name);
212 format!("r{delim}\"{string}\"{delim}",
213 delim="#".repeat(n), string=literal_string.get())
216 /* Name components */
217 IDENT(s, _) => input.get(s.name).to_owned(),
218 LIFETIME(s) => format!("'{}", input.get(s.name)),
222 DOC_COMMENT(ref s) => {
223 let comment_string = get_ident(s.name);
224 comment_string.get().to_str()
227 INTERPOLATED(ref nt) => {
229 &NtExpr(e) => ::print::pprust::expr_to_str(e, input),
230 &NtAttr(e) => ::print::pprust::attribute_to_str(e, input),
232 ~"an interpolated " +
234 NtItem(..) => ~"item",
235 NtBlock(..) => ~"block",
236 NtStmt(..) => ~"statement",
237 NtPat(..) => ~"pattern",
238 NtAttr(..) => fail!("should have been handled"),
239 NtExpr(..) => fail!("should have been handled above"),
241 NtIdent(..) => ~"identifier",
242 NtPath(..) => ~"path",
244 NtMatchers(..) => ~"matcher sequence"
252 pub fn can_begin_expr(t: &Token) -> bool {
261 LIT_INT(_, _) => true,
262 LIT_UINT(_, _) => true,
263 LIT_INT_UNSUFFIXED(_) => true,
264 LIT_FLOAT(_, _) => true,
265 LIT_FLOAT_UNSUFFIXED(_) => true,
267 LIT_STR_RAW(_, _) => true,
271 BINOP(MINUS) => true,
274 BINOP(OR) => true, // in lambda syntax
275 OROR => true, // in lambda syntax
277 INTERPOLATED(NtExpr(..))
278 | INTERPOLATED(NtIdent(..))
279 | INTERPOLATED(NtBlock(..))
280 | INTERPOLATED(NtPath(..)) => true,
285 /// what's the opposite delimiter?
286 pub fn flip_delimiter(t: &token::Token) -> token::Token {
290 LBRACKET => RBRACKET,
293 RBRACKET => LBRACKET,
300 pub fn is_lit(t: &Token) -> bool {
303 LIT_INT(_, _) => true,
304 LIT_UINT(_, _) => true,
305 LIT_INT_UNSUFFIXED(_) => true,
306 LIT_FLOAT(_, _) => true,
307 LIT_FLOAT_UNSUFFIXED(_) => true,
309 LIT_STR_RAW(_, _) => true,
314 pub fn is_ident(t: &Token) -> bool {
315 match *t { IDENT(_, _) => true, _ => false }
318 pub fn is_ident_or_path(t: &Token) -> bool {
320 IDENT(_, _) | INTERPOLATED(NtPath(..)) => true,
325 pub fn is_plain_ident(t: &Token) -> bool {
326 match *t { IDENT(_, false) => true, _ => false }
329 pub fn is_bar(t: &Token) -> bool {
330 match *t { BINOP(OR) | OROR => true, _ => false }
333 // Get the first "argument"
335 ( $first:expr, $( $remainder:expr, )* ) => ( $first )
338 // Get the last "argument" (has to be done recursively to avoid phoney local ambiguity error)
340 ( $first:expr, $( $remainder:expr, )+ ) => ( last!( $( $remainder, )+ ) );
341 ( $first:expr, ) => ( $first )
344 // In this macro, there is the requirement that the name (the number) must be monotonically
345 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
346 // except starting from the next number instead of zero, and with the additional exception that
347 // special identifiers are *also* allowed (they are deduplicated in the important place, the
348 // interner), an exception which is demonstrated by "static" and "self".
349 macro_rules! declare_special_idents_and_keywords {(
350 // So now, in these rules, why is each definition parenthesised?
351 // Answer: otherwise we get a spurious local ambiguity bug on the "}"
352 pub mod special_idents {
353 $( ($si_name:expr, $si_static:ident, $si_str:expr); )*
358 $( ($sk_name:expr, $sk_variant:ident, $sk_str:expr); )*
360 $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )*
363 static STRICT_KEYWORD_START: Name = first!($( $sk_name, )*);
364 static STRICT_KEYWORD_FINAL: Name = last!($( $sk_name, )*);
365 static RESERVED_KEYWORD_START: Name = first!($( $rk_name, )*);
366 static RESERVED_KEYWORD_FINAL: Name = last!($( $rk_name, )*);
368 pub mod special_idents {
370 $( pub static $si_static: Ident = Ident { name: $si_name, ctxt: 0 }; )*
374 * All the valid words that have meaning in the Rust language.
376 * Rust keywords are either 'strict' or 'reserved'. Strict keywords may not
377 * appear as identifiers at all. Reserved keywords are not used anywhere in
378 * the language and may not appear as identifiers.
389 pub fn to_ident(&self) -> Ident {
391 $( $sk_variant => Ident { name: $sk_name, ctxt: 0 }, )*
392 $( $rk_variant => Ident { name: $rk_name, ctxt: 0 }, )*
398 fn mk_fresh_ident_interner() -> @IdentInterner {
399 // The indices here must correspond to the numbers in
400 // special_idents, in Keyword to_ident(), and in static
408 @interner::StrInterner::prefill(init_vec)
412 // If the special idents get renumbered, remember to modify these two as appropriate
413 static SELF_KEYWORD_NAME: Name = 3;
414 static STATIC_KEYWORD_NAME: Name = 10;
416 declare_special_idents_and_keywords! {
417 pub mod special_idents {
418 // These ones are statics
421 (1, invalid, ""); // ''
422 (2, clownshoes_extensions, "__extensions__");
424 (super::SELF_KEYWORD_NAME, self_, "self"); // 'self'
428 (5, matchers, "matchers");
430 // outside of libsyntax
432 (7, clownshoe_abi, "__rust_abi");
434 (9, opaque, "<opaque>");
435 (super::STATIC_KEYWORD_NAME, statik, "static");
436 (11, clownshoes_foreign_mod, "__foreign_mod__");
437 (12, unnamed_field, "<unnamed_field>");
438 (13, type_self, "Self"); // `Self`
442 // These ones are variants of the Keyword enum
446 (15, Break, "break");
447 (16, Const, "const");
450 (19, Extern, "extern");
451 (20, False, "false");
458 (27, __LogLevel, "__log_level");
460 (29, Match, "match");
467 (36, Return, "return");
468 // Static and Self are also special idents (prefill de-dupes)
469 (super::STATIC_KEYWORD_NAME, Static, "static");
470 (super::SELF_KEYWORD_NAME, Self, "self");
471 (37, Struct, "struct");
472 (38, Super, "super");
474 (40, Trait, "trait");
476 (42, Unsafe, "unsafe");
478 (44, While, "while");
479 (45, Continue, "continue");
484 (48, Alignof, "alignof");
486 (50, Offsetof, "offsetof");
488 (52, Sizeof, "sizeof");
489 (53, Typeof, "typeof");
490 (54, Unsized, "unsized");
491 (55, Yield, "yield");
496 * Maps a token to a record specifying the corresponding binary
499 pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
501 BINOP(STAR) => Some(ast::BiMul),
502 BINOP(SLASH) => Some(ast::BiDiv),
503 BINOP(PERCENT) => Some(ast::BiRem),
504 BINOP(PLUS) => Some(ast::BiAdd),
505 BINOP(MINUS) => Some(ast::BiSub),
506 BINOP(SHL) => Some(ast::BiShl),
507 BINOP(SHR) => Some(ast::BiShr),
508 BINOP(AND) => Some(ast::BiBitAnd),
509 BINOP(CARET) => Some(ast::BiBitXor),
510 BINOP(OR) => Some(ast::BiBitOr),
511 LT => Some(ast::BiLt),
512 LE => Some(ast::BiLe),
513 GE => Some(ast::BiGe),
514 GT => Some(ast::BiGt),
515 EQEQ => Some(ast::BiEq),
516 NE => Some(ast::BiNe),
517 ANDAND => Some(ast::BiAnd),
518 OROR => Some(ast::BiOr),
523 // looks like we can get rid of this completely...
524 pub type IdentInterner = StrInterner;
526 // if an interner exists in TLS, return it. Otherwise, prepare a
528 pub fn get_ident_interner() -> @IdentInterner {
529 local_data_key!(key: @@::parse::token::IdentInterner)
530 match local_data::get(key, |k| k.map(|k| *k)) {
531 Some(interner) => *interner,
533 let interner = mk_fresh_ident_interner();
534 local_data::set(key, @interner);
540 /// Represents a string stored in the task-local interner. Because the
541 /// interner lives for the life of the task, this can be safely treated as an
542 /// immortal string, as long as it never crosses between tasks.
544 /// XXX(pcwalton): You must be careful about what you do in the destructors of
545 /// objects stored in TLS, because they may run after the interner is
546 /// destroyed. In particular, they must not access string contents. This can
547 /// be fixed in the future by just leaking all strings until task death
550 #[deriving(Clone, Eq, IterBytes, Ord, TotalEq, TotalOrd)]
551 pub struct InternedString {
556 impl Drop for InternedString {
558 // No-op just to make this not implicitly copyable.
562 impl InternedString {
564 pub fn new(string: &'static str) -> InternedString {
566 string: string.to_managed(),
570 // NB: Do not make this public. We are trying to remove `@str`.
572 fn new_from_at_str(string: @str) -> InternedString {
579 pub fn get<'a>(&'a self) -> &'a str {
580 self.string.as_slice()
584 impl BytesContainer for InternedString {
585 fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
586 // XXX(pcwalton): This is a workaround for the incorrect signature of
587 // `BytesContainer`, which is itself a workaround for the lack of DST.
589 let this = self.get();
590 cast::transmute(this.container_as_bytes())
595 impl fmt::Default for InternedString {
596 fn fmt(obj: &InternedString, f: &mut fmt::Formatter) {
597 write!(f.buf, "{}", obj.string);
601 impl<'a> Equiv<&'a str> for InternedString {
602 fn equiv(&self, other: & &'a str) -> bool {
603 (*other) == self.string.as_slice()
607 impl<D:Decoder> Decodable<D> for InternedString {
608 fn decode(d: &mut D) -> InternedString {
609 let interner = get_ident_interner();
610 get_ident(interner.intern(d.read_str()))
614 impl<E:Encoder> Encodable<E> for InternedString {
615 fn encode(&self, e: &mut E) {
616 e.emit_str(self.string)
620 /// Returns the string contents of an identifier, using the task-local
623 pub fn get_ident(idx: Name) -> InternedString {
624 let interner = get_ident_interner();
625 InternedString::new_from_at_str(interner.get(idx))
628 /// Interns and returns the string contents of an identifier, using the
629 /// task-local interner.
631 pub fn intern_and_get_ident(s: &str) -> InternedString {
635 /* for when we don't care about the contents; doesn't interact with TLD or
637 pub fn mk_fake_ident_interner() -> @IdentInterner {
638 @interner::StrInterner::new()
641 // maps a string to its interned representation
643 pub fn intern(str : &str) -> Name {
644 let interner = get_ident_interner();
648 // gensyms a new uint, using the current interner
649 pub fn gensym(str : &str) -> Name {
650 let interner = get_ident_interner();
654 // map an interned representation back to a string
655 pub fn interner_get(name : Name) -> @str {
656 get_ident_interner().get(name)
659 // maps a string to an identifier with an empty syntax context
660 pub fn str_to_ident(str : &str) -> ast::Ident {
661 ast::Ident::new(intern(str))
664 // maps a string to a gensym'ed identifier
665 pub fn gensym_ident(str : &str) -> ast::Ident {
666 ast::Ident::new(gensym(str))
669 // create a fresh name that maps to the same string as the old one.
670 // note that this guarantees that str_ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
671 // that is, that the new name and the old one are connected to ptr_eq strings.
672 pub fn fresh_name(src : &ast::Ident) -> Name {
673 let interner = get_ident_interner();
674 interner.gensym_copy(src.name)
675 // following: debug version. Could work in final except that it's incompatible with
676 // good error messages and uses of struct names in ambiguous could-be-binding
677 // locations. Also definitely destroys the guarantee given above about ptr_eq.
678 /*let num = rand::rng().gen_uint_range(0,0xffff);
679 gensym(format!("{}_{}",ident_to_str(src),num))*/
682 // it looks like there oughta be a str_ptr_eq fn, but no one bothered to implement it?
684 // determine whether two @str values are pointer-equal
685 pub fn str_ptr_eq(a : @str, b : @str) -> bool {
687 let p : uint = cast::transmute(a);
688 let q : uint = cast::transmute(b);
690 // got to transmute them back, to make sure the ref count is correct:
691 let _junk1 : @str = cast::transmute(p);
692 let _junk2 : @str = cast::transmute(q);
697 // return true when two identifiers refer (through the intern table) to the same ptr_eq
698 // string. This is used to compare identifiers in places where hygienic comparison is
699 // not wanted (i.e. not lexical vars).
700 pub fn ident_spelling_eq(a : &ast::Ident, b : &ast::Ident) -> bool {
701 str_ptr_eq(interner_get(a.name),interner_get(b.name))
704 // create a fresh mark.
705 pub fn fresh_mark() -> Mrk {
709 // See the macro above about the types of keywords
711 pub fn is_keyword(kw: keywords::Keyword, tok: &Token) -> bool {
713 token::IDENT(sid, false) => { kw.to_ident().name == sid.name }
718 pub fn is_any_keyword(tok: &Token) -> bool {
720 token::IDENT(sid, false) => match sid.name {
721 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
722 STRICT_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
729 pub fn is_strict_keyword(tok: &Token) -> bool {
731 token::IDENT(sid, false) => match sid.name {
732 SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME |
733 STRICT_KEYWORD_START .. STRICT_KEYWORD_FINAL => true,
740 pub fn is_reserved_keyword(tok: &Token) -> bool {
742 token::IDENT(sid, false) => match sid.name {
743 RESERVED_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true,
750 pub fn mtwt_token_eq(t1 : &Token, t2 : &Token) -> bool {
752 (&IDENT(id1,_),&IDENT(id2,_)) =>
753 ast_util::mtwt_resolve(id1) == ast_util::mtwt_resolve(id2),
765 fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident {
766 ast::Ident{name:id.name,ctxt:ast_util::new_mark(m,id.ctxt)}
769 #[test] fn mtwt_token_eq_test() {
770 assert!(mtwt_token_eq(>,>));
771 let a = str_to_ident("bac");
772 let a1 = mark_ident(a,92);
773 assert!(mtwt_token_eq(&IDENT(a,true),&IDENT(a1,false)));