1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.name.as_str().trim_left_matches('\'')), self.span)
61 pub fn modern(self) -> Ident {
62 Ident::new(self.name, self.span.modern())
65 pub fn gensym(self) -> Ident {
66 Ident::new(self.name.gensymed(), self.span)
70 impl PartialEq for Ident {
71 fn eq(&self, rhs: &Self) -> bool {
72 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
77 fn hash<H: Hasher>(&self, state: &mut H) {
78 self.name.hash(state);
79 self.span.ctxt().hash(state);
83 impl fmt::Debug for Ident {
84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 write!(f, "{}{:?}", self.name, self.span.ctxt())
89 impl fmt::Display for Ident {
90 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91 fmt::Display::fmt(&self.name, f)
95 impl Encodable for Ident {
96 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
97 if self.span.ctxt().modern() == SyntaxContext::empty() {
98 s.emit_str(&self.name.as_str())
99 } else { // FIXME(jseyfried) intercrate hygiene
100 let mut string = "#".to_owned();
101 string.push_str(&self.name.as_str());
107 impl Decodable for Ident {
108 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
109 let string = d.read_str()?;
110 Ok(if !string.starts_with('#') {
111 Ident::from_str(&string)
112 } else { // FIXME(jseyfried) intercrate hygiene
113 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
118 /// A symbol is an interned or gensymed string.
119 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
120 pub struct Symbol(u32);
122 // The interner is pointed to by a thread local value which is only set on the main thread
123 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
124 // to avoid panics and other errors, even though it would be memory safe to do so.
125 #[cfg(not(parallel_queries))]
126 impl !Send for Symbol { }
127 #[cfg(not(parallel_queries))]
128 impl !Sync for Symbol { }
131 /// Maps a string to its interned representation.
132 pub fn intern(string: &str) -> Self {
133 with_interner(|interner| interner.intern(string))
136 pub fn interned(self) -> Self {
137 with_interner(|interner| interner.interned(self))
140 /// gensym's a new usize, using the current interner.
141 pub fn gensym(string: &str) -> Self {
142 with_interner(|interner| interner.gensym(string))
145 pub fn gensymed(self) -> Self {
146 with_interner(|interner| interner.gensymed(self))
149 pub fn as_str(self) -> LocalInternedString {
150 with_interner(|interner| unsafe {
151 LocalInternedString {
152 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
157 pub fn as_interned_str(self) -> InternedString {
158 with_interner(|interner| InternedString {
159 symbol: interner.interned(self)
163 pub fn as_u32(self) -> u32 {
168 impl fmt::Debug for Symbol {
169 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
170 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
172 write!(f, "{}({})", self, self.0)
174 write!(f, "{}", self)
179 impl fmt::Display for Symbol {
180 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
181 fmt::Display::fmt(&self.as_str(), f)
185 impl Encodable for Symbol {
186 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
187 s.emit_str(&self.as_str())
191 impl Decodable for Symbol {
192 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
193 Ok(Symbol::intern(&d.read_str()?))
197 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
198 fn eq(&self, other: &T) -> bool {
199 self.as_str() == other.deref()
203 // The &'static strs in this type actually point into the arena
204 pub struct Interner {
205 arena: DroplessArena,
206 names: FxHashMap<&'static str, Symbol>,
207 strings: Vec<&'static str>,
208 gensyms: Vec<Symbol>,
212 pub fn new() -> Self {
214 arena: DroplessArena::new(),
215 names: Default::default(),
216 strings: Default::default(),
217 gensyms: Default::default(),
221 fn prefill(init: &[&str]) -> Self {
222 let mut this = Interner::new();
223 for &string in init {
225 // We can't allocate empty strings in the arena, so handle this here
226 let name = Symbol(this.strings.len() as u32);
227 this.names.insert("", name);
228 this.strings.push("");
236 pub fn intern(&mut self, string: &str) -> Symbol {
237 if let Some(&name) = self.names.get(string) {
241 let name = Symbol(self.strings.len() as u32);
243 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
244 let string: &str = unsafe {
245 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
247 // It is safe to extend the arena allocation to 'static because we only access
248 // these while the arena is still alive
249 let string: &'static str = unsafe {
250 &*(string as *const str)
252 self.strings.push(string);
253 self.names.insert(string, name);
257 pub fn interned(&self, symbol: Symbol) -> Symbol {
258 if (symbol.0 as usize) < self.strings.len() {
261 self.interned(self.gensyms[(!0 - symbol.0) as usize])
265 fn gensym(&mut self, string: &str) -> Symbol {
266 let symbol = self.intern(string);
267 self.gensymed(symbol)
270 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
271 self.gensyms.push(symbol);
272 Symbol(!0 - self.gensyms.len() as u32 + 1)
275 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
276 symbol.0 as usize >= self.strings.len()
279 pub fn get(&self, symbol: Symbol) -> &str {
280 match self.strings.get(symbol.0 as usize) {
281 Some(string) => string,
282 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
287 // In this macro, there is the requirement that the name (the number) must be monotonically
288 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
289 // except starting from the next number instead of zero.
290 macro_rules! declare_keywords {(
291 $( ($index: expr, $konst: ident, $string: expr) )*
294 use super::{Symbol, Ident};
295 #[derive(Clone, Copy, PartialEq, Eq)]
300 #[inline] pub fn ident(self) -> Ident { self.ident }
301 #[inline] pub fn name(self) -> Symbol { self.ident.name }
304 #[allow(non_upper_case_globals)]
305 pub const $konst: Keyword = Keyword {
306 ident: Ident::with_empty_ctxt(super::Symbol($index))
312 pub fn fresh() -> Self {
313 Interner::prefill(&[$($string,)*])
318 // NB: leaving holes in the ident table is bad! a different ident will get
319 // interned with the id from the hole, but it will be between the min and max
320 // of the reserved words, and thus tagged as "reserved".
321 // After modifying this list adjust `is_special_ident`, `is_used_keyword`/`is_unused_keyword`,
322 // this should be rarely necessary though if the keywords are kept in alphabetic order.
324 // Special reserved identifiers used internally for elided lifetimes,
325 // unnamed method parameters, crate root module, error recovery etc.
327 (1, CrateRoot, "{{root}}")
328 (2, DollarCrate, "$crate")
331 // Keywords used in the language.
336 (8, Continue, "continue")
340 (12, Extern, "extern")
355 (27, Return, "return")
356 (28, SelfValue, "self")
357 (29, SelfType, "Self")
358 (30, Static, "static")
359 (31, Struct, "struct")
364 (36, Unsafe, "unsafe")
369 // Keywords reserved for future use.
370 (40, Abstract, "abstract")
371 (41, Alignof, "alignof")
372 (42, Become, "become")
376 (46, Offsetof, "offsetof")
377 (47, Override, "override")
380 (50, Sizeof, "sizeof")
381 (51, Typeof, "typeof")
382 (52, Unsized, "unsized")
383 (53, Virtual, "virtual")
386 // Special lifetime names
387 (55, UnderscoreLifetime, "'_")
388 (56, StaticLifetime, "'static")
390 // Weak keywords, have special meaning only in specific contexts.
393 (59, Default, "default")
398 // If an interner exists, return it. Otherwise, prepare a fresh one.
400 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
401 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
404 /// Represents a string stored in the interner. Because the interner outlives any thread
405 /// which uses this type, we can safely treat `string` which points to interner data,
406 /// as an immortal string, as long as this type never crosses between threads.
407 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
408 // by creating a new thread right after constructing the interner
409 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
410 pub struct LocalInternedString {
411 string: &'static str,
414 impl LocalInternedString {
415 pub fn as_interned_str(self) -> InternedString {
417 symbol: Symbol::intern(self.string)
422 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
424 str: ::std::convert::AsRef<U>
426 fn as_ref(&self) -> &U {
431 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
432 fn eq(&self, other: &T) -> bool {
433 self.string == other.deref()
437 impl ::std::cmp::PartialEq<LocalInternedString> for str {
438 fn eq(&self, other: &LocalInternedString) -> bool {
443 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
444 fn eq(&self, other: &LocalInternedString) -> bool {
445 *self == other.string
449 impl ::std::cmp::PartialEq<LocalInternedString> for String {
450 fn eq(&self, other: &LocalInternedString) -> bool {
455 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
456 fn eq(&self, other: &LocalInternedString) -> bool {
457 *self == other.string
461 impl !Send for LocalInternedString {}
462 impl !Sync for LocalInternedString {}
464 impl ::std::ops::Deref for LocalInternedString {
466 fn deref(&self) -> &str { self.string }
469 impl fmt::Debug for LocalInternedString {
470 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
471 fmt::Debug::fmt(self.string, f)
475 impl fmt::Display for LocalInternedString {
476 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
477 fmt::Display::fmt(self.string, f)
481 impl Decodable for LocalInternedString {
482 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
483 Ok(Symbol::intern(&d.read_str()?).as_str())
487 impl Encodable for LocalInternedString {
488 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
489 s.emit_str(self.string)
493 /// Represents a string stored in the string interner
494 #[derive(Clone, Copy, Eq)]
495 pub struct InternedString {
499 impl InternedString {
500 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
501 let str = with_interner(|interner| {
502 interner.get(self.symbol) as *const str
504 // This is safe because the interner keeps string alive until it is dropped.
505 // We can access it because we know the interner is still alive since we use a
506 // scoped thread local to access it, and it was alive at the begining of this scope
510 pub fn as_symbol(self) -> Symbol {
514 pub fn as_str(self) -> LocalInternedString {
519 impl Hash for InternedString {
520 fn hash<H: Hasher>(&self, state: &mut H) {
521 self.with(|str| str.hash(state))
525 impl PartialOrd<InternedString> for InternedString {
526 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
527 if self.symbol == other.symbol {
528 return Some(Ordering::Equal);
530 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
534 impl Ord for InternedString {
535 fn cmp(&self, other: &InternedString) -> Ordering {
536 if self.symbol == other.symbol {
537 return Ordering::Equal;
539 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
543 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
544 fn eq(&self, other: &T) -> bool {
545 self.with(|string| string == other.deref())
549 impl PartialEq<InternedString> for InternedString {
550 fn eq(&self, other: &InternedString) -> bool {
551 self.symbol == other.symbol
555 impl PartialEq<InternedString> for str {
556 fn eq(&self, other: &InternedString) -> bool {
557 other.with(|string| self == string)
561 impl<'a> PartialEq<InternedString> for &'a str {
562 fn eq(&self, other: &InternedString) -> bool {
563 other.with(|string| *self == string)
567 impl PartialEq<InternedString> for String {
568 fn eq(&self, other: &InternedString) -> bool {
569 other.with(|string| self == string)
573 impl<'a> PartialEq<InternedString> for &'a String {
574 fn eq(&self, other: &InternedString) -> bool {
575 other.with(|string| *self == string)
579 impl ::std::convert::From<InternedString> for String {
580 fn from(val: InternedString) -> String {
581 val.as_symbol().to_string()
585 impl fmt::Debug for InternedString {
586 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
587 self.with(|str| fmt::Debug::fmt(&str, f))
591 impl fmt::Display for InternedString {
592 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
593 self.with(|str| fmt::Display::fmt(&str, f))
597 impl Decodable for InternedString {
598 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
599 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
603 impl Encodable for InternedString {
604 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
605 self.with(|string| s.emit_str(string))
615 fn interner_tests() {
616 let mut i: Interner = Interner::new();
617 // first one is zero:
618 assert_eq!(i.intern("dog"), Symbol(0));
619 // re-use gets the same entry:
620 assert_eq!(i.intern("dog"), Symbol(0));
621 // different string gets a different #:
622 assert_eq!(i.intern("cat"), Symbol(1));
623 assert_eq!(i.intern("cat"), Symbol(1));
624 // dog is still at zero
625 assert_eq!(i.intern("dog"), Symbol(0));
626 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
627 // gensym of same string gets new number :
628 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
629 // gensym of *existing* string gets new number:
630 assert_eq!(i.gensym("dog"), Symbol(4294967293));
634 fn without_first_quote_test() {
635 GLOBALS.set(&Globals::new(), || {
636 let i = Ident::from_str("'break");
637 assert_eq!(i.without_first_quote().name, keywords::Break.name());