1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 use util::interner::Interner;
18 use core::hashmap::HashSet;
41 /* Expression-operator symbols. */
56 /* Structural symbols */
78 LIT_INT(i64, ast::int_ty),
79 LIT_UINT(u64, ast::uint_ty),
80 LIT_INT_UNSUFFIXED(i64),
81 LIT_FLOAT(ast::ident, ast::float_ty),
82 LIT_FLOAT_UNSUFFIXED(ast::ident),
86 // an identifier contains an "is_mod_name" boolean,
87 // indicating whether :: follows this token with no
88 // whitespace in between.
89 IDENT(ast::ident, bool),
93 /* For interpolation */
94 INTERPOLATED(nonterminal),
96 DOC_COMMENT(ast::ident),
103 /// For interpolation during macro expansion.
104 pub enum nonterminal {
111 nt_ident(ast::ident, bool),
113 nt_tt( @ast::token_tree), //needs @ed to break a circularity
114 nt_matchers(~[ast::matcher])
117 pub fn binop_to_str(o: binop) -> ~str {
132 pub fn to_str(in: @ident_interner, t: &Token) -> ~str {
145 BINOP(op) => binop_to_str(op),
146 BINOPEQ(op) => binop_to_str(op) + ~"=",
148 /* Structural symbols */
170 LIT_INT(c, ast::ty_char) => {
171 ~"'" + char::escape_default(c as char) + ~"'"
174 i.to_str() + ast_util::int_ty_to_str(t)
177 u.to_str() + ast_util::uint_ty_to_str(t)
179 LIT_INT_UNSUFFIXED(i) => { i.to_str() }
181 let mut body = copy *in.get(s);
182 if body.ends_with(~".") {
183 body = body + ~"0"; // `10.f` is not a float literal
185 body + ast_util::float_ty_to_str(t)
187 LIT_FLOAT_UNSUFFIXED(s) => {
188 let mut body = copy *in.get(s);
189 if body.ends_with(~".") {
190 body = body + ~"0"; // `10.f` is not a float literal
194 LIT_STR(s) => { ~"\"" + str::escape_default(*in.get(s)) + ~"\"" }
196 /* Name components */
197 IDENT(s, _) => copy *in.get(s),
198 LIFETIME(s) => fmt!("'%s", *in.get(s)),
202 DOC_COMMENT(s) => copy *in.get(s),
204 INTERPOLATED(ref nt) => {
206 &nt_expr(e) => ::print::pprust::expr_to_str(e, in),
208 ~"an interpolated " +
210 nt_item(*) => ~"item",
211 nt_block(*) => ~"block",
212 nt_stmt(*) => ~"statement",
213 nt_pat(*) => ~"pattern",
214 nt_expr(*) => fail!(~"should have been handled above"),
216 nt_ident(*) => ~"identifier",
217 nt_path(*) => ~"path",
219 nt_matchers(*) => ~"matcher sequence"
227 pub fn can_begin_expr(t: &Token) -> bool {
235 LIT_INT(_, _) => true,
236 LIT_UINT(_, _) => true,
237 LIT_INT_UNSUFFIXED(_) => true,
238 LIT_FLOAT(_, _) => true,
239 LIT_FLOAT_UNSUFFIXED(_) => true,
244 BINOP(MINUS) => true,
247 BINOP(OR) => true, // in lambda syntax
248 OROR => true, // in lambda syntax
250 INTERPOLATED(nt_expr(*))
251 | INTERPOLATED(nt_ident(*))
252 | INTERPOLATED(nt_block(*))
253 | INTERPOLATED(nt_path(*)) => true,
258 /// what's the opposite delimiter?
259 pub fn flip_delimiter(t: &token::Token) -> token::Token {
263 LBRACKET => RBRACKET,
266 RBRACKET => LBRACKET,
273 pub fn is_lit(t: &Token) -> bool {
275 LIT_INT(_, _) => true,
276 LIT_UINT(_, _) => true,
277 LIT_INT_UNSUFFIXED(_) => true,
278 LIT_FLOAT(_, _) => true,
279 LIT_FLOAT_UNSUFFIXED(_) => true,
285 pub fn is_ident(t: &Token) -> bool {
286 match *t { IDENT(_, _) => true, _ => false }
289 pub fn is_ident_or_path(t: &Token) -> bool {
291 IDENT(_, _) | INTERPOLATED(nt_path(*)) => true,
296 pub fn is_plain_ident(t: &Token) -> bool {
297 match *t { IDENT(_, false) => true, _ => false }
300 pub fn is_bar(t: &Token) -> bool {
301 match *t { BINOP(OR) | OROR => true, _ => false }
305 pub mod special_idents {
308 pub static underscore : ident = ident { repr: 0u, ctxt: 0};
309 pub static anon : ident = ident { repr: 1u, ctxt: 0};
310 pub static dtor : ident = ident { repr: 2u, ctxt: 0}; // 'drop', but that's
312 pub static invalid : ident = ident { repr: 3u, ctxt: 0}; // ''
313 pub static unary : ident = ident { repr: 4u, ctxt: 0};
314 pub static not_fn : ident = ident { repr: 5u, ctxt: 0};
315 pub static idx_fn : ident = ident { repr: 6u, ctxt: 0};
316 pub static unary_minus_fn : ident = ident { repr: 7u, ctxt: 0};
317 pub static clownshoes_extensions : ident = ident { repr: 8u, ctxt: 0};
319 pub static self_ : ident = ident { repr: 9u, ctxt: 0}; // 'self'
321 /* for matcher NTs */
322 pub static item : ident = ident { repr: 10u, ctxt: 0};
323 pub static block : ident = ident { repr: 11u, ctxt: 0};
324 pub static stmt : ident = ident { repr: 12u, ctxt: 0};
325 pub static pat : ident = ident { repr: 13u, ctxt: 0};
326 pub static expr : ident = ident { repr: 14u, ctxt: 0};
327 pub static ty : ident = ident { repr: 15u, ctxt: 0};
328 pub static ident : ident = ident { repr: 16u, ctxt: 0};
329 pub static path : ident = ident { repr: 17u, ctxt: 0};
330 pub static tt : ident = ident { repr: 18u, ctxt: 0};
331 pub static matchers : ident = ident { repr: 19u, ctxt: 0};
333 pub static str : ident = ident { repr: 20u, ctxt: 0}; // for the type
335 /* outside of libsyntax */
336 pub static ty_visitor : ident = ident { repr: 21u, ctxt: 0};
337 pub static arg : ident = ident { repr: 22u, ctxt: 0};
338 pub static descrim : ident = ident { repr: 23u, ctxt: 0};
339 pub static clownshoe_abi : ident = ident { repr: 24u, ctxt: 0};
340 pub static clownshoe_stack_shim : ident = ident { repr: 25u, ctxt: 0};
341 pub static tydesc : ident = ident { repr: 26u, ctxt: 0};
342 pub static literally_dtor : ident = ident { repr: 27u, ctxt: 0};
343 pub static main : ident = ident { repr: 28u, ctxt: 0};
344 pub static opaque : ident = ident { repr: 29u, ctxt: 0};
345 pub static blk : ident = ident { repr: 30u, ctxt: 0};
346 pub static static : ident = ident { repr: 31u, ctxt: 0};
347 pub static intrinsic : ident = ident { repr: 32u, ctxt: 0};
348 pub static clownshoes_foreign_mod: ident = ident { repr: 33u, ctxt: 0};
349 pub static unnamed_field: ident = ident { repr: 34u, ctxt: 0};
350 pub static c_abi: ident = ident { repr: 35u, ctxt: 0};
351 pub static type_self: ident = ident { repr: 36u, ctxt: 0}; // `Self`
354 pub struct StringRef<'self>(&'self str);
356 impl<'self> Equiv<@~str> for StringRef<'self> {
358 fn equiv(&self, other: &@~str) -> bool { str::eq_slice(**self, **other) }
361 impl<'self> to_bytes::IterBytes for StringRef<'self> {
362 fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) {
363 (**self).iter_bytes(lsb0, f);
368 * Maps a token to a record specifying the corresponding binary
371 pub fn token_to_binop(tok: Token) -> Option<ast::binop> {
373 BINOP(STAR) => Some(ast::mul),
374 BINOP(SLASH) => Some(ast::quot),
375 BINOP(PERCENT) => Some(ast::rem),
376 BINOP(PLUS) => Some(ast::add),
377 BINOP(MINUS) => Some(ast::subtract),
378 BINOP(SHL) => Some(ast::shl),
379 BINOP(SHR) => Some(ast::shr),
380 BINOP(AND) => Some(ast::bitand),
381 BINOP(CARET) => Some(ast::bitxor),
382 BINOP(OR) => Some(ast::bitor),
387 EQEQ => Some(ast::eq),
389 ANDAND => Some(ast::and),
390 OROR => Some(ast::or),
395 pub struct ident_interner {
396 priv interner: Interner<@~str>,
399 pub impl ident_interner {
400 fn intern(&self, val: @~str) -> ast::ident {
401 ast::ident { repr: self.interner.intern(val), ctxt: 0 }
403 fn gensym(&self, val: @~str) -> ast::ident {
404 ast::ident { repr: self.interner.gensym(val), ctxt: 0 }
406 fn get(&self, idx: ast::ident) -> @~str {
407 self.interner.get(idx.repr)
409 fn len(&self) -> uint {
412 fn find_equiv<Q:Hash + IterBytes + Equiv<@~str>>(&self, val: &Q)
413 -> Option<ast::ident> {
414 match self.interner.find_equiv(val) {
415 Some(v) => Some(ast::ident { repr: v, ctxt: 0 }),
421 // return a fresh interner, preloaded with special identifiers.
422 // EFFECT: stores this interner in TLS
423 pub fn mk_fresh_ident_interner() -> @ident_interner {
424 // the indices here must correspond to the numbers in
435 @~"__extensions__", // 8
451 @~"__rust_abi", // 24
452 @~"__rust_stack_shim", // 25
460 @~"__foreign_mod__", // 33
466 let rv = @ident_interner {
467 interner: interner::Interner::prefill(init_vec)
470 task::local_data::local_data_set(interner_key!(), @rv);
475 // if an interner exists in TLS, return it. Otherwise, prepare a
477 pub fn mk_ident_interner() -> @ident_interner {
479 match task::local_data::local_data_get(interner_key!()) {
480 Some(interner) => *interner,
482 mk_fresh_ident_interner()
488 /* for when we don't care about the contents; doesn't interact with TLD or
490 pub fn mk_fake_ident_interner() -> @ident_interner {
491 @ident_interner { interner: interner::Interner::new() }
495 * All the valid words that have meaning in the Rust language.
497 * Rust keywords are either 'temporary', 'strict' or 'reserved'. Temporary
498 * keywords are contextual and may be used as identifiers anywhere. They are
499 * expected to disappear from the grammar soon. Strict keywords may not
500 * appear as identifiers at all. Reserved keywords are not used anywhere in
501 * the language and may not appear as identifiers.
503 pub fn keyword_table() -> HashSet<~str> {
504 let mut keywords = HashSet::new();
505 let mut tmp = temporary_keyword_table();
506 let mut strict = strict_keyword_table();
507 let mut reserved = reserved_keyword_table();
509 do tmp.consume |word| { keywords.insert(word); }
510 do strict.consume |word| { keywords.insert(word); }
511 do reserved.consume |word| { keywords.insert(word); }
515 /// Keywords that may be used as identifiers
516 pub fn temporary_keyword_table() -> HashSet<~str> {
517 let mut words = HashSet::new();
521 do vec::consume(keys) |_, s| {
527 /// Full keywords. May not appear anywhere else.
528 pub fn strict_keyword_table() -> HashSet<~str> {
529 let mut words = HashSet::new();
535 ~"else", ~"enum", ~"extern",
536 ~"false", ~"fn", ~"for",
538 ~"let", ~"__log", ~"loop",
539 ~"match", ~"mod", ~"mut",
541 ~"priv", ~"pub", ~"pure",
544 ~"true", ~"trait", ~"type",
548 do vec::consume(keys) |_, w| {
554 pub fn reserved_keyword_table() -> HashSet<~str> {
555 let mut words = HashSet::new();
559 do vec::consume(keys) |_, s| {
568 // indent-tabs-mode: nil
570 // buffer-file-coding-system: utf-8-unix