1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use serialize::{Decodable, Decoder, Encodable, Encoder};
16 use std::cell::RefCell;
17 use std::collections::HashMap;
20 /// A symbol is an interned or gensymed string.
21 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
22 pub struct Symbol(u32);
24 // The interner in thread-local, so `Symbol` shouldn't move between threads.
25 impl !Send for Symbol { }
28 /// Maps a string to its interned representation.
29 pub fn intern(string: &str) -> Self {
30 with_interner(|interner| interner.intern(string))
33 /// gensym's a new usize, using the current interner.
34 pub fn gensym(string: &str) -> Self {
35 with_interner(|interner| interner.gensym(string))
38 pub fn as_str(self) -> InternedString {
39 with_interner(|interner| unsafe {
41 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
46 pub fn as_u32(self) -> u32 {
51 impl fmt::Debug for Symbol {
52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 write!(f, "{}({})", self, self.0)
57 impl fmt::Display for Symbol {
58 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59 fmt::Display::fmt(&self.as_str(), f)
63 impl Encodable for Symbol {
64 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
65 s.emit_str(&self.as_str())
69 impl Decodable for Symbol {
70 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
71 Ok(Symbol::intern(&d.read_str()?))
75 impl<'a> PartialEq<&'a str> for Symbol {
76 fn eq(&self, other: &&str) -> bool {
77 *self.as_str() == **other
83 names: HashMap<Box<str>, Symbol>,
84 strings: Vec<Box<str>>,
88 pub fn new() -> Self {
92 fn prefill(init: &[&str]) -> Self {
93 let mut this = Interner::new();
100 pub fn intern(&mut self, string: &str) -> Symbol {
101 if let Some(&name) = self.names.get(string) {
105 let name = Symbol(self.strings.len() as u32);
106 let string = string.to_string().into_boxed_str();
107 self.strings.push(string.clone());
108 self.names.insert(string, name);
112 fn gensym(&mut self, string: &str) -> Symbol {
113 let gensym = Symbol(self.strings.len() as u32);
114 // leave out of `names` to avoid colliding
115 self.strings.push(string.to_string().into_boxed_str());
119 pub fn get(&self, name: Symbol) -> &str {
120 &self.strings[name.0 as usize]
124 // In this macro, there is the requirement that the name (the number) must be monotonically
125 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
126 // except starting from the next number instead of zero.
127 macro_rules! declare_keywords {(
128 $( ($index: expr, $konst: ident, $string: expr) )*
132 #[derive(Clone, Copy, PartialEq, Eq)]
137 #[inline] pub fn ident(self) -> ast::Ident { self.ident }
138 #[inline] pub fn name(self) -> ast::Name { self.ident.name }
141 #[allow(non_upper_case_globals)]
142 pub const $konst: Keyword = Keyword {
143 ident: ast::Ident::with_empty_ctxt(ast::Name($index))
150 Interner::prefill(&[$($string,)*])
155 // NB: leaving holes in the ident table is bad! a different ident will get
156 // interned with the id from the hole, but it will be between the min and max
157 // of the reserved words, and thus tagged as "reserved".
158 // After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`,
159 // this should be rarely necessary though if the keywords are kept in alphabetic order.
161 // Invalid identifier
164 // Strict keywords used in the language.
169 (5, Continue, "continue")
173 (9, Extern, "extern")
188 (24, Return, "return")
189 (25, SelfValue, "self")
190 (26, SelfType, "Self")
191 (27, Static, "static")
192 (28, Struct, "struct")
197 (33, Unsafe, "unsafe")
202 // Keywords reserved for future use.
203 (37, Abstract, "abstract")
204 (38, Alignof, "alignof")
205 (39, Become, "become")
209 (43, Offsetof, "offsetof")
210 (44, Override, "override")
214 (48, Sizeof, "sizeof")
215 (49, Typeof, "typeof")
216 (50, Unsized, "unsized")
217 (51, Virtual, "virtual")
220 // Weak keywords, have special meaning only in specific contexts.
221 (53, Default, "default")
222 (54, StaticLifetime, "'static")
225 // A virtual keyword that resolves to the crate root when used in a lexical scope.
226 (56, CrateRoot, "{{root}}")
229 // If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
230 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
231 thread_local!(static INTERNER: RefCell<Interner> = {
232 RefCell::new(Interner::fresh())
234 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
237 /// Represents a string stored in the thread-local interner. Because the
238 /// interner lives for the life of the thread, this can be safely treated as an
239 /// immortal string, as long as it never crosses between threads.
241 /// FIXME(pcwalton): You must be careful about what you do in the destructors
242 /// of objects stored in TLS, because they may run after the interner is
243 /// destroyed. In particular, they must not access string contents. This can
244 /// be fixed in the future by just leaking all strings until thread death
246 #[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
247 pub struct InternedString {
248 string: &'static str,
251 impl !Send for InternedString { }
253 impl ::std::ops::Deref for InternedString {
255 fn deref(&self) -> &str { self.string }
258 impl fmt::Debug for InternedString {
259 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
260 fmt::Debug::fmt(self.string, f)
264 impl fmt::Display for InternedString {
265 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266 fmt::Display::fmt(self.string, f)
270 impl Decodable for InternedString {
271 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
272 Ok(Symbol::intern(&d.read_str()?).as_str())
276 impl Encodable for InternedString {
277 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
278 s.emit_str(self.string)
288 fn interner_tests() {
289 let mut i: Interner = Interner::new();
290 // first one is zero:
291 assert_eq!(i.intern("dog"), Name(0));
292 // re-use gets the same entry:
293 assert_eq!(i.intern ("dog"), Name(0));
294 // different string gets a different #:
295 assert_eq!(i.intern("cat"), Name(1));
296 assert_eq!(i.intern("cat"), Name(1));
297 // dog is still at zero
298 assert_eq!(i.intern("dog"), Name(0));
300 assert_eq!(i.gensym("zebra"), Name(2));
301 // gensym of same string gets new number :
302 assert_eq!(i.gensym("zebra"), Name(3));
303 // gensym of *existing* string gets new number:
304 assert_eq!(i.gensym("dog"), Name(4));