1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
17 use serialize::{Decodable, Decoder, Encodable, Encoder};
18 use std::cell::RefCell;
19 use std::collections::HashMap;
22 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
25 pub ctxt: SyntaxContext,
29 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
30 Ident { name: name, ctxt: SyntaxContext::empty() }
33 /// Maps a string to an identifier with an empty syntax context.
34 pub fn from_str(string: &str) -> Ident {
35 Ident::with_empty_ctxt(Symbol::intern(string))
38 pub fn modern(self) -> Ident {
39 Ident { name: self.name, ctxt: self.ctxt.modern() }
43 impl fmt::Debug for Ident {
44 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
45 write!(f, "{}{:?}", self.name, self.ctxt)
49 impl fmt::Display for Ident {
50 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51 fmt::Display::fmt(&self.name, f)
55 impl Encodable for Ident {
56 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
57 if self.ctxt.modern() == SyntaxContext::empty() {
58 s.emit_str(&self.name.as_str())
59 } else { // FIXME(jseyfried) intercrate hygiene
60 let mut string = "#".to_owned();
61 string.push_str(&self.name.as_str());
67 impl Decodable for Ident {
68 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
69 let string = d.read_str()?;
70 Ok(if !string.starts_with('#') {
71 Ident::from_str(&string)
72 } else { // FIXME(jseyfried) intercrate hygiene
73 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
78 /// A symbol is an interned or gensymed string.
79 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
80 pub struct Symbol(u32);
82 // The interner in thread-local, so `Symbol` shouldn't move between threads.
83 impl !Send for Symbol { }
84 impl !Sync for Symbol { }
87 /// Maps a string to its interned representation.
88 pub fn intern(string: &str) -> Self {
89 with_interner(|interner| interner.intern(string))
92 pub fn interned(self) -> Self {
93 with_interner(|interner| interner.interned(self))
96 /// gensym's a new usize, using the current interner.
97 pub fn gensym(string: &str) -> Self {
98 with_interner(|interner| interner.gensym(string))
101 pub fn gensymed(self) -> Self {
102 with_interner(|interner| interner.gensymed(self))
105 pub fn as_str(self) -> InternedString {
106 with_interner(|interner| unsafe {
108 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
113 pub fn as_u32(self) -> u32 {
118 impl<'a> From<&'a str> for Symbol {
119 fn from(string: &'a str) -> Symbol {
120 Symbol::intern(string)
124 impl fmt::Debug for Symbol {
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 write!(f, "{}({})", self, self.0)
130 impl fmt::Display for Symbol {
131 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
132 fmt::Display::fmt(&self.as_str(), f)
136 impl Encodable for Symbol {
137 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
138 s.emit_str(&self.as_str())
142 impl Decodable for Symbol {
143 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
144 Ok(Symbol::intern(&d.read_str()?))
148 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
149 fn eq(&self, other: &T) -> bool {
150 self.as_str() == other.deref()
155 pub struct Interner {
156 names: HashMap<Box<str>, Symbol>,
157 strings: Vec<Box<str>>,
158 gensyms: Vec<Symbol>,
162 pub fn new() -> Self {
166 fn prefill(init: &[&str]) -> Self {
167 let mut this = Interner::new();
168 for &string in init {
174 pub fn intern(&mut self, string: &str) -> Symbol {
175 if let Some(&name) = self.names.get(string) {
179 let name = Symbol(self.strings.len() as u32);
180 let string = string.to_string().into_boxed_str();
181 self.strings.push(string.clone());
182 self.names.insert(string, name);
186 pub fn interned(&self, symbol: Symbol) -> Symbol {
187 if (symbol.0 as usize) < self.strings.len() {
190 self.interned(self.gensyms[(!0 - symbol.0) as usize])
194 fn gensym(&mut self, string: &str) -> Symbol {
195 let symbol = self.intern(string);
196 self.gensymed(symbol)
199 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
200 self.gensyms.push(symbol);
201 Symbol(!0 - self.gensyms.len() as u32 + 1)
204 pub fn get(&self, symbol: Symbol) -> &str {
205 match self.strings.get(symbol.0 as usize) {
206 Some(ref string) => string,
207 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
212 // In this macro, there is the requirement that the name (the number) must be monotonically
213 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
214 // except starting from the next number instead of zero.
215 macro_rules! declare_keywords {(
216 $( ($index: expr, $konst: ident, $string: expr) )*
219 use super::{Symbol, Ident};
220 #[derive(Clone, Copy, PartialEq, Eq)]
225 #[inline] pub fn ident(self) -> Ident { self.ident }
226 #[inline] pub fn name(self) -> Symbol { self.ident.name }
229 #[allow(non_upper_case_globals)]
230 pub const $konst: Keyword = Keyword {
231 ident: Ident::with_empty_ctxt(super::Symbol($index))
238 Interner::prefill(&[$($string,)*])
243 // NB: leaving holes in the ident table is bad! a different ident will get
244 // interned with the id from the hole, but it will be between the min and max
245 // of the reserved words, and thus tagged as "reserved".
246 // After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`,
247 // this should be rarely necessary though if the keywords are kept in alphabetic order.
249 // Special reserved identifiers used internally for elided lifetimes,
250 // unnamed method parameters, crate root module, error recovery etc.
252 (1, CrateRoot, "{{root}}")
253 (2, DollarCrate, "$crate")
255 // Keywords used in the language.
260 (7, Continue, "continue")
264 (11, Extern, "extern")
279 (26, Return, "return")
280 (27, SelfValue, "self")
281 (28, SelfType, "Self")
282 (29, Static, "static")
283 (30, Struct, "struct")
288 (35, Unsafe, "unsafe")
293 // Keywords reserved for future use.
294 (39, Abstract, "abstract")
295 (40, Alignof, "alignof")
296 (41, Become, "become")
300 (45, Offsetof, "offsetof")
301 (46, Override, "override")
305 (50, Sizeof, "sizeof")
306 (51, Typeof, "typeof")
307 (52, Unsized, "unsized")
308 (53, Virtual, "virtual")
311 // Weak keywords, have special meaning only in specific contexts.
312 (55, Default, "default")
313 (56, StaticLifetime, "'static")
318 // If an interner exists in TLS, return it. Otherwise, prepare a fresh one.
319 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
320 thread_local!(static INTERNER: RefCell<Interner> = {
321 RefCell::new(Interner::fresh())
323 INTERNER.with(|interner| f(&mut *interner.borrow_mut()))
326 /// Represents a string stored in the thread-local interner. Because the
327 /// interner lives for the life of the thread, this can be safely treated as an
328 /// immortal string, as long as it never crosses between threads.
330 /// FIXME(pcwalton): You must be careful about what you do in the destructors
331 /// of objects stored in TLS, because they may run after the interner is
332 /// destroyed. In particular, they must not access string contents. This can
333 /// be fixed in the future by just leaking all strings until thread death
335 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
336 pub struct InternedString {
337 string: &'static str,
340 impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> {
341 fn as_ref(&self) -> &U {
346 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString {
347 fn eq(&self, other: &T) -> bool {
348 self.string == other.deref()
352 impl ::std::cmp::PartialEq<InternedString> for str {
353 fn eq(&self, other: &InternedString) -> bool {
358 impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str {
359 fn eq(&self, other: &InternedString) -> bool {
360 *self == other.string
364 impl ::std::cmp::PartialEq<InternedString> for String {
365 fn eq(&self, other: &InternedString) -> bool {
370 impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String {
371 fn eq(&self, other: &InternedString) -> bool {
372 *self == other.string
376 impl !Send for InternedString { }
378 impl ::std::ops::Deref for InternedString {
380 fn deref(&self) -> &str { self.string }
383 impl fmt::Debug for InternedString {
384 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
385 fmt::Debug::fmt(self.string, f)
389 impl fmt::Display for InternedString {
390 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
391 fmt::Display::fmt(self.string, f)
395 impl Decodable for InternedString {
396 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
397 Ok(Symbol::intern(&d.read_str()?).as_str())
401 impl Encodable for InternedString {
402 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
403 s.emit_str(self.string)
412 fn interner_tests() {
413 let mut i: Interner = Interner::new();
414 // first one is zero:
415 assert_eq!(i.intern("dog"), Symbol(0));
416 // re-use gets the same entry:
417 assert_eq!(i.intern ("dog"), Symbol(0));
418 // different string gets a different #:
419 assert_eq!(i.intern("cat"), Symbol(1));
420 assert_eq!(i.intern("cat"), Symbol(1));
421 // dog is still at zero
422 assert_eq!(i.intern("dog"), Symbol(0));
423 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
424 // gensym of same string gets new number :
425 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
426 // gensym of *existing* string gets new number:
427 assert_eq!(i.gensym("dog"), Symbol(4294967293));