1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
17 use serialize::{Decodable, Decoder, Encodable, Encoder};
18 use std::cell::RefCell;
19 use std::collections::HashMap;
22 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
25 pub ctxt: SyntaxContext,
29 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
30 Ident { name: name, ctxt: SyntaxContext::empty() }
33 /// Maps a string to an identifier with an empty syntax context.
34 pub fn from_str(string: &str) -> Ident {
35 Ident::with_empty_ctxt(Symbol::intern(string))
38 pub fn modern(self) -> Ident {
39 Ident { name: self.name, ctxt: self.ctxt.modern() }
43 impl fmt::Debug for Ident {
44 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 write!(f, "{}{:?}", self.name, self.ctxt)
49 impl fmt::Display for Ident {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 fmt::Display::fmt(&self.name, f)
55 impl Encodable for Ident {
56 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
61 impl Decodable for Ident {
62 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
63 Ok(Ident::with_empty_ctxt(Symbol::decode(d)?))
67 /// A symbol is an interned or gensymed string.
68 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
69 pub struct Symbol(u32);
71 // The interner in thread-local, so `Symbol` shouldn't move between threads.
72 impl !Send for Symbol { }
75 /// Maps a string to its interned representation.
76 pub fn intern(string: &str) -> Self {
77 with_interner(|interner| interner.intern(string))
80 /// gensym's a new usize, using the current interner.
81 pub fn gensym(string: &str) -> Self {
82 with_interner(|interner| interner.gensym(string))
85 pub fn as_str(self) -> InternedString {
86 with_interner(|interner| unsafe {
88 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
93 pub fn as_u32(self) -> u32 {
98 impl fmt::Debug for Symbol {
99 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100 write!(f, "{}({})", self, self.0)
104 impl fmt::Display for Symbol {
105 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106 fmt::Display::fmt(&self.as_str(), f)
110 impl Encodable for Symbol {
111 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
112 s.emit_str(&self.as_str())
116 impl Decodable for Symbol {
117 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
118 Ok(Symbol::intern(&d.read_str()?))
122 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
123 fn eq(&self, other: &T) -> bool {
124 self.as_str() == other.deref()
129 pub struct Interner {
130 names: HashMap<Box<str>, Symbol>,
131 strings: Vec<Box<str>>,
135 pub fn new() -> Self {
139 fn prefill(init: &[&str]) -> Self {
140 let mut this = Interner::new();
141 for &string in init {
147 pub fn intern(&mut self, string: &str) -> Symbol {
148 if let Some(&name) = self.names.get(string) {
152 let name = Symbol(self.strings.len() as u32);
153 let string = string.to_string().into_boxed_str();
154 self.strings.push(string.clone());
155 self.names.insert(string, name);
159 fn gensym(&mut self, string: &str) -> Symbol {
160 let gensym = Symbol(self.strings.len() as u32);
161 // leave out of `names` to avoid colliding
162 self.strings.push(string.to_string().into_boxed_str());
166 pub fn get(&self, name: Symbol) -> &str {
167 &self.strings[name.0 as usize]
171 // In this macro, there is the requirement that the name (the number) must be monotonically
172 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
173 // except starting from the next number instead of zero.
174 macro_rules! declare_keywords {(
175 $( ($index: expr, $konst: ident, $string: expr) )*
178 use super::{Symbol, Ident};
179 #[derive(Clone, Copy, PartialEq, Eq)]
184 #[inline] pub fn ident(self) -> Ident { self.ident }
185 #[inline] pub fn name(self) -> Symbol { self.ident.name }
188 #[allow(non_upper_case_globals)]
189 pub const $konst: Keyword = Keyword {
190 ident: Ident::with_empty_ctxt(super::Symbol($index))
197 Interner::prefill(&[$($string,)*])
202 // NB: leaving holes in the ident table is bad! a different ident will get
203 // interned with the id from the hole, but it will be between the min and max
204 // of the reserved words, and thus tagged as "reserved".
205 // After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`,
206 // this should be rarely necessary though if the keywords are kept in alphabetic order.
208 // Invalid identifier
211 // Strict keywords used in the language.
216 (5, Continue, "continue")
220 (9, Extern, "extern")
235 (24, Return, "return")
236 (25, SelfValue, "self")
237 (26, SelfType, "Self")
238 (27, Static, "static")
239 (28, Struct, "struct")
244 (33, Unsafe, "unsafe")
249 // Keywords reserved for future use.
250 (37, Abstract, "abstract")
251 (38, Alignof, "alignof")
252 (39, Become, "become")
256 (43, Offsetof, "offsetof")
257 (44, Override, "override")
261 (48, Sizeof, "sizeof")
262 (49, Typeof, "typeof")
263 (50, Unsized, "unsized")
264 (51, Virtual, "virtual")
267 // Weak keywords, have special meaning only in specific contexts.
268 (53, Default, "default")
269 (54, StaticLifetime, "'static")
273 // A virtual keyword that resolves to the crate root when used in a lexical scope.
274 (57, CrateRoot, "{{root}}")
277 // If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
278 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
279 thread_local!(static INTERNER: RefCell<Interner> = {
280 RefCell::new(Interner::fresh())
282 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
285 /// Represents a string stored in the thread-local interner. Because the
286 /// interner lives for the life of the thread, this can be safely treated as an
287 /// immortal string, as long as it never crosses between threads.
289 /// FIXME(pcwalton): You must be careful about what you do in the destructors
290 /// of objects stored in TLS, because they may run after the interner is
291 /// destroyed. In particular, they must not access string contents. This can
292 /// be fixed in the future by just leaking all strings until thread death
294 #[derive(Clone, Hash, PartialOrd, Eq, Ord)]
295 pub struct InternedString {
296 string: &'static str,
299 impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> {
300 fn as_ref(&self) -> &U {
305 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString {
306 fn eq(&self, other: &T) -> bool {
307 self.string == other.deref()
311 impl ::std::cmp::PartialEq<InternedString> for str {
312 fn eq(&self, other: &InternedString) -> bool {
317 impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str {
318 fn eq(&self, other: &InternedString) -> bool {
319 *self == other.string
323 impl ::std::cmp::PartialEq<InternedString> for String {
324 fn eq(&self, other: &InternedString) -> bool {
329 impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String {
330 fn eq(&self, other: &InternedString) -> bool {
331 *self == other.string
335 impl !Send for InternedString { }
337 impl ::std::ops::Deref for InternedString {
339 fn deref(&self) -> &str { self.string }
342 impl fmt::Debug for InternedString {
343 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
344 fmt::Debug::fmt(self.string, f)
348 impl fmt::Display for InternedString {
349 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
350 fmt::Display::fmt(self.string, f)
354 impl Decodable for InternedString {
355 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
356 Ok(Symbol::intern(&d.read_str()?).as_str())
360 impl Encodable for InternedString {
361 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
362 s.emit_str(self.string)
371 fn interner_tests() {
372 let mut i: Interner = Interner::new();
373 // first one is zero:
374 assert_eq!(i.intern("dog"), Symbol(0));
375 // re-use gets the same entry:
376 assert_eq!(i.intern ("dog"), Symbol(0));
377 // different string gets a different #:
378 assert_eq!(i.intern("cat"), Symbol(1));
379 assert_eq!(i.intern("cat"), Symbol(1));
380 // dog is still at zero
381 assert_eq!(i.intern("dog"), Symbol(0));
383 assert_eq!(i.gensym("zebra"), Symbol(2));
384 // gensym of same string gets new number :
385 assert_eq!(i.gensym("zebra"), Symbol(3));
386 // gensym of *existing* string gets new number:
387 assert_eq!(i.gensym("dog"), Symbol(4));