1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use serialize::{Decodable, Decoder, Encodable, Encoder};
16 use std::cell::RefCell;
17 use std::collections::HashMap;
21 /// A symbol is an interned or gensymed string.
22 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
23 pub struct Symbol(u32);
26 /// Maps a string to its interned representation.
27 pub fn intern(string: &str) -> Self {
28 with_interner(|interner| interner.intern(string))
31 /// gensym's a new usize, using the current interner.
32 pub fn gensym(string: &str) -> Self {
33 with_interner(|interner| interner.gensym(string))
36 pub fn as_str(self) -> InternedString {
37 with_interner(|interner| InternedString { string: interner.get(self) })
40 pub fn as_u32(self) -> u32 {
45 impl fmt::Debug for Symbol {
46 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
47 write!(f, "{}({})", self, self.0)
51 impl fmt::Display for Symbol {
52 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53 fmt::Display::fmt(&self.as_str(), f)
57 impl Encodable for Symbol {
58 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
59 s.emit_str(&self.as_str())
63 impl Decodable for Symbol {
64 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
65 Ok(Symbol::intern(&d.read_str()?))
69 impl<'a> PartialEq<&'a str> for Symbol {
70 fn eq(&self, other: &&str) -> bool {
71 *self.as_str() == **other
77 names: HashMap<Rc<str>, Symbol>,
78 strings: Vec<Rc<str>>,
82 pub fn new() -> Self {
86 fn prefill(init: &[&str]) -> Self {
87 let mut this = Interner::new();
94 pub fn intern(&mut self, string: &str) -> Symbol {
95 if let Some(&name) = self.names.get(string) {
99 let name = Symbol(self.strings.len() as u32);
100 let string = Rc::__from_str(string);
101 self.strings.push(string.clone());
102 self.names.insert(string, name);
106 fn gensym(&mut self, string: &str) -> Symbol {
107 let gensym = Symbol(self.strings.len() as u32);
108 // leave out of `names` to avoid colliding
109 self.strings.push(Rc::__from_str(string));
113 pub fn get(&self, name: Symbol) -> Rc<str> {
114 self.strings[name.0 as usize].clone()
118 // In this macro, there is the requirement that the name (the number) must be monotonically
119 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
120 // except starting from the next number instead of zero.
121 macro_rules! declare_keywords {(
122 $( ($index: expr, $konst: ident, $string: expr) )*
126 #[derive(Clone, Copy, PartialEq, Eq)]
131 #[inline] pub fn ident(self) -> ast::Ident { self.ident }
132 #[inline] pub fn name(self) -> ast::Name { self.ident.name }
135 #[allow(non_upper_case_globals)]
136 pub const $konst: Keyword = Keyword {
137 ident: ast::Ident::with_empty_ctxt(ast::Name($index))
144 Interner::prefill(&[$($string,)*])
149 // NB: leaving holes in the ident table is bad! a different ident will get
150 // interned with the id from the hole, but it will be between the min and max
151 // of the reserved words, and thus tagged as "reserved".
152 // After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`,
153 // this should be rarely necessary though if the keywords are kept in alphabetic order.
155 // Invalid identifier
158 // Strict keywords used in the language.
163 (5, Continue, "continue")
167 (9, Extern, "extern")
182 (24, Return, "return")
183 (25, SelfValue, "self")
184 (26, SelfType, "Self")
185 (27, Static, "static")
186 (28, Struct, "struct")
191 (33, Unsafe, "unsafe")
196 // Keywords reserved for future use.
197 (37, Abstract, "abstract")
198 (38, Alignof, "alignof")
199 (39, Become, "become")
203 (43, Offsetof, "offsetof")
204 (44, Override, "override")
208 (48, Sizeof, "sizeof")
209 (49, Typeof, "typeof")
210 (50, Unsized, "unsized")
211 (51, Virtual, "virtual")
214 // Weak keywords, have special meaning only in specific contexts.
215 (53, Default, "default")
216 (54, StaticLifetime, "'static")
220 // If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
221 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
222 thread_local!(static INTERNER: RefCell<Interner> = {
223 RefCell::new(Interner::fresh())
225 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
228 /// Reset the ident interner to its initial state.
229 pub fn reset_interner() {
230 with_interner(|interner| *interner = Interner::fresh());
233 /// Represents a string stored in the thread-local interner. Because the
234 /// interner lives for the life of the thread, this can be safely treated as an
235 /// immortal string, as long as it never crosses between threads.
237 /// FIXME(pcwalton): You must be careful about what you do in the destructors
238 /// of objects stored in TLS, because they may run after the interner is
239 /// destroyed. In particular, they must not access string contents. This can
240 /// be fixed in the future by just leaking all strings until thread death
242 #[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
243 pub struct InternedString {
247 impl InternedString {
248 pub fn new(string: &'static str) -> InternedString {
250 string: Rc::__from_str(string),
255 impl ::std::ops::Deref for InternedString {
257 fn deref(&self) -> &str { &self.string }
260 impl fmt::Debug for InternedString {
261 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
262 fmt::Debug::fmt(&self.string, f)
266 impl fmt::Display for InternedString {
267 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
268 fmt::Display::fmt(&self.string, f)
272 impl<'a> PartialEq<&'a str> for InternedString {
273 fn eq(&self, other: & &'a str) -> bool {
274 PartialEq::eq(&self.string[..], *other)
278 impl<'a> PartialEq<InternedString> for &'a str {
279 fn eq(&self, other: &InternedString) -> bool {
280 PartialEq::eq(*self, &other.string[..])
284 impl PartialEq<str> for InternedString {
285 fn eq(&self, other: &str) -> bool {
286 PartialEq::eq(&self.string[..], other)
290 impl PartialEq<InternedString> for str {
291 fn eq(&self, other: &InternedString) -> bool {
292 PartialEq::eq(self, &other.string[..])
296 impl Decodable for InternedString {
297 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
298 Ok(Symbol::intern(&d.read_str()?).as_str())
302 impl Encodable for InternedString {
303 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
304 s.emit_str(&self.string)
314 fn interner_tests() {
315 let mut i: Interner = Interner::new();
316 // first one is zero:
317 assert_eq!(i.intern("dog"), Name(0));
318 // re-use gets the same entry:
319 assert_eq!(i.intern ("dog"), Name(0));
320 // different string gets a different #:
321 assert_eq!(i.intern("cat"), Name(1));
322 assert_eq!(i.intern("cat"), Name(1));
323 // dog is still at zero
324 assert_eq!(i.intern("dog"), Name(0));
326 assert_eq!(i.gensym("zebra"), Name(2));
327 // gensym of same string gets new number :
328 assert_eq!(i.gensym("zebra"), Name(3));
329 // gensym of *existing* string gets new number:
330 assert_eq!(i.gensym("dog"), Name(4));