1 //! An "interner" is a data structure that associates values with usize tags and
2 //! allows bidirectional lookup; i.e., given a value, one can easily find the
3 //! type, and vice versa.
5 use arena::DroplessArena;
6 use rustc_data_structures::fx::FxHashMap;
7 use rustc_data_structures::indexed_vec::Idx;
8 use serialize::{Decodable, Decoder, Encodable, Encoder};
12 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
13 use std::hash::{Hash, Hasher};
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 #[derive(Copy, Clone, Eq)]
26 pub const fn new(name: Symbol, span: Span) -> Ident {
31 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
32 Ident::new(name, DUMMY_SP)
35 /// Maps an interned string to an identifier with an empty syntax context.
36 pub fn from_interned_str(string: InternedString) -> Ident {
37 Ident::with_empty_ctxt(string.as_symbol())
40 /// Maps a string to an identifier with an empty syntax context.
41 pub fn from_str(string: &str) -> Ident {
42 Ident::with_empty_ctxt(Symbol::intern(string))
45 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
46 pub fn with_span_pos(self, span: Span) -> Ident {
47 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
50 pub fn without_first_quote(self) -> Ident {
51 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
54 /// "Normalize" ident for use in comparisons using "item hygiene".
55 /// Identifiers with same string value become same if they came from the same "modern" macro
56 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
57 /// different "modern" macros.
58 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
59 pub fn modern(self) -> Ident {
60 Ident::new(self.name, self.span.modern())
63 /// "Normalize" ident for use in comparisons using "local variable hygiene".
64 /// Identifiers with same string value become same if they came from the same non-transparent
65 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
66 /// non-transparent macros.
67 /// Technically, this operation strips all transparent marks from ident's syntactic context.
68 pub fn modern_and_legacy(self) -> Ident {
69 Ident::new(self.name, self.span.modern_and_legacy())
72 pub fn gensym(self) -> Ident {
73 Ident::new(self.name.gensymed(), self.span)
76 pub fn gensym_if_underscore(self) -> Ident {
77 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
80 pub fn as_str(self) -> LocalInternedString {
84 pub fn as_interned_str(self) -> InternedString {
85 self.name.as_interned_str()
89 impl PartialEq for Ident {
90 fn eq(&self, rhs: &Self) -> bool {
91 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
96 fn hash<H: Hasher>(&self, state: &mut H) {
97 self.name.hash(state);
98 self.span.ctxt().hash(state);
102 impl fmt::Debug for Ident {
103 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
104 write!(f, "{}{:?}", self.name, self.span.ctxt())
108 impl fmt::Display for Ident {
109 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
110 fmt::Display::fmt(&self.name, f)
114 impl Encodable for Ident {
115 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
116 if self.span.ctxt().modern() == SyntaxContext::empty() {
117 s.emit_str(&self.as_str())
118 } else { // FIXME(jseyfried): intercrate hygiene
119 let mut string = "#".to_owned();
120 string.push_str(&self.as_str());
126 impl Decodable for Ident {
127 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
128 let string = d.read_str()?;
129 Ok(if !string.starts_with('#') {
130 Ident::from_str(&string)
131 } else { // FIXME(jseyfried): intercrate hygiene
132 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
137 /// A symbol is an interned or gensymed string. The use of newtype_index! means
138 /// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
139 /// the last 256 values for tagging purposes.
141 /// Note that Symbol cannot be a newtype_index! directly because it implements
142 /// fmt::Debug, Encodable, and Decodable in special ways.
143 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
144 pub struct Symbol(SymbolIndex);
147 pub struct SymbolIndex { .. }
150 // The interner is pointed to by a thread local value which is only set on the main thread
151 // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
152 // to avoid panics and other errors, even though it would be memory safe to do so.
153 #[cfg(not(parallel_compiler))]
154 impl !Send for Symbol { }
155 #[cfg(not(parallel_compiler))]
156 impl !Sync for Symbol { }
159 const fn new(n: u32) -> Self {
160 Symbol(SymbolIndex::from_u32_const(n))
163 /// Maps a string to its interned representation.
164 pub fn intern(string: &str) -> Self {
165 with_interner(|interner| interner.intern(string))
168 pub fn interned(self) -> Self {
169 with_interner(|interner| interner.interned(self))
172 /// Gensyms a new usize, using the current interner.
173 pub fn gensym(string: &str) -> Self {
174 with_interner(|interner| interner.gensym(string))
177 pub fn gensymed(self) -> Self {
178 with_interner(|interner| interner.gensymed(self))
181 pub fn as_str(self) -> LocalInternedString {
182 with_interner(|interner| unsafe {
183 LocalInternedString {
184 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
189 pub fn as_interned_str(self) -> InternedString {
190 with_interner(|interner| InternedString {
191 symbol: interner.interned(self)
195 pub fn as_u32(self) -> u32 {
200 impl fmt::Debug for Symbol {
201 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
202 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
204 write!(f, "{}({:?})", self, self.0)
206 write!(f, "{}", self)
211 impl fmt::Display for Symbol {
212 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
213 fmt::Display::fmt(&self.as_str(), f)
217 impl Encodable for Symbol {
218 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
219 s.emit_str(&self.as_str())
223 impl Decodable for Symbol {
224 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
225 Ok(Symbol::intern(&d.read_str()?))
229 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
230 fn eq(&self, other: &T) -> bool {
231 self.as_str() == other.deref()
235 // The `&'static str`s in this type actually point into the arena.
237 // Note that normal symbols are indexed upward from 0, and gensyms are indexed
238 // downward from SymbolIndex::MAX_AS_U32.
240 pub struct Interner {
241 arena: DroplessArena,
242 names: FxHashMap<&'static str, Symbol>,
243 strings: Vec<&'static str>,
244 gensyms: Vec<Symbol>,
248 fn prefill(init: &[&str]) -> Self {
249 let mut this = Interner::default();
250 for &string in init {
252 // We can't allocate empty strings in the arena, so handle this here.
253 let name = Symbol::new(this.strings.len() as u32);
254 this.names.insert("", name);
255 this.strings.push("");
263 pub fn intern(&mut self, string: &str) -> Symbol {
264 if let Some(&name) = self.names.get(string) {
268 let name = Symbol::new(self.strings.len() as u32);
270 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
272 let string: &str = unsafe {
273 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
275 // It is safe to extend the arena allocation to `'static` because we only access
276 // these while the arena is still alive.
277 let string: &'static str = unsafe {
278 &*(string as *const str)
280 self.strings.push(string);
281 self.names.insert(string, name);
285 pub fn interned(&self, symbol: Symbol) -> Symbol {
286 if (symbol.0.as_usize()) < self.strings.len() {
289 self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
293 fn gensym(&mut self, string: &str) -> Symbol {
294 let symbol = self.intern(string);
295 self.gensymed(symbol)
298 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
299 self.gensyms.push(symbol);
300 Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
303 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
304 symbol.0.as_usize() >= self.strings.len()
307 pub fn get(&self, symbol: Symbol) -> &str {
308 match self.strings.get(symbol.0.as_usize()) {
309 Some(string) => string,
310 None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
315 // In this macro, there is the requirement that the name (the number) must be monotonically
316 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
317 // except starting from the next number instead of zero.
318 macro_rules! declare_keywords {(
319 $( ($index: expr, $konst: ident, $string: expr) )*
322 use super::{Symbol, Ident};
323 #[derive(Clone, Copy, PartialEq, Eq)]
328 #[inline] pub fn ident(self) -> Ident { self.ident }
329 #[inline] pub fn name(self) -> Symbol { self.ident.name }
332 #[allow(non_upper_case_globals)]
333 pub const $konst: Keyword = Keyword {
334 ident: Ident::with_empty_ctxt(super::Symbol::new($index))
338 impl ::std::str::FromStr for Keyword {
341 fn from_str(s: &str) -> Result<Self, ()> {
343 $($string => Ok($konst),)*
351 pub fn fresh() -> Self {
352 Interner::prefill(&[$($string,)*])
357 // N.B., leaving holes in the ident table is bad! a different ident will get
358 // interned with the id from the hole, but it will be between the min and max
359 // of the reserved words, and thus tagged as "reserved".
360 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
361 // this should be rarely necessary though if the keywords are kept in alphabetic order.
363 // Special reserved identifiers used internally for elided lifetimes,
364 // unnamed method parameters, crate root module, error recovery etc.
366 (1, PathRoot, "{{root}}")
367 (2, DollarCrate, "$crate")
370 // Keywords that are used in stable Rust.
375 (8, Continue, "continue")
379 (12, Extern, "extern")
394 (27, Return, "return")
395 (28, SelfLower, "self")
396 (29, SelfUpper, "Self")
397 (30, Static, "static")
398 (31, Struct, "struct")
403 (36, Unsafe, "unsafe")
408 // Keywords that are used in unstable Rust or reserved for future use.
409 (40, Abstract, "abstract")
410 (41, Become, "become")
414 (45, Override, "override")
416 (47, Typeof, "typeof")
417 (48, Unsized, "unsized")
418 (49, Virtual, "virtual")
421 // Edition-specific keywords that are used in stable Rust.
422 (51, Dyn, "dyn") // >= 2018 Edition only
424 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
425 (52, Async, "async") // >= 2018 Edition only
426 (53, Try, "try") // >= 2018 Edition only
428 // Special lifetime names
429 (54, UnderscoreLifetime, "'_")
430 (55, StaticLifetime, "'static")
432 // Weak keywords, have special meaning only in specific contexts.
435 (58, Default, "default")
436 (59, Existential, "existential")
441 fn is_used_keyword_2018(self) -> bool {
442 self == keywords::Dyn.name()
445 fn is_unused_keyword_2018(self) -> bool {
446 self >= keywords::Async.name() && self <= keywords::Try.name()
451 // Returns `true` for reserved identifiers used internally for elided lifetimes,
452 // unnamed method parameters, crate root module, error recovery etc.
453 pub fn is_special(self) -> bool {
454 self.name <= keywords::Underscore.name()
457 /// Returns `true` if the token is a keyword used in the language.
458 pub fn is_used_keyword(self) -> bool {
459 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
460 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
461 self.name.is_used_keyword_2018() && self.span.rust_2018()
464 /// Returns `true` if the token is a keyword reserved for possible future use.
465 pub fn is_unused_keyword(self) -> bool {
466 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
467 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
468 self.name.is_unused_keyword_2018() && self.span.rust_2018()
471 /// Returns `true` if the token is either a special identifier or a keyword.
472 pub fn is_reserved(self) -> bool {
473 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
476 /// A keyword or reserved identifier that can be used as a path segment.
477 pub fn is_path_segment_keyword(self) -> bool {
478 self.name == keywords::Super.name() ||
479 self.name == keywords::SelfLower.name() ||
480 self.name == keywords::SelfUpper.name() ||
481 self.name == keywords::Crate.name() ||
482 self.name == keywords::PathRoot.name() ||
483 self.name == keywords::DollarCrate.name()
486 // We see this identifier in a normal identifier position, like variable name or a type.
487 // How was it written originally? Did it use the raw form? Let's try to guess.
488 pub fn is_raw_guess(self) -> bool {
489 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
490 self.is_reserved() && !self.is_path_segment_keyword()
494 // If an interner exists, return it. Otherwise, prepare a fresh one.
496 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
497 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
500 /// Represents a string stored in the interner. Because the interner outlives any thread
501 /// which uses this type, we can safely treat `string` which points to interner data,
502 /// as an immortal string, as long as this type never crosses between threads.
503 // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
504 // by creating a new thread right after constructing the interner.
505 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
506 pub struct LocalInternedString {
507 string: &'static str,
510 impl LocalInternedString {
511 pub fn as_interned_str(self) -> InternedString {
513 symbol: Symbol::intern(self.string)
517 pub fn get(&self) -> &'static str {
522 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
524 str: ::std::convert::AsRef<U>
526 fn as_ref(&self) -> &U {
531 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
532 fn eq(&self, other: &T) -> bool {
533 self.string == other.deref()
537 impl ::std::cmp::PartialEq<LocalInternedString> for str {
538 fn eq(&self, other: &LocalInternedString) -> bool {
543 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
544 fn eq(&self, other: &LocalInternedString) -> bool {
545 *self == other.string
549 impl ::std::cmp::PartialEq<LocalInternedString> for String {
550 fn eq(&self, other: &LocalInternedString) -> bool {
555 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
556 fn eq(&self, other: &LocalInternedString) -> bool {
557 *self == other.string
561 impl !Send for LocalInternedString {}
562 impl !Sync for LocalInternedString {}
564 impl ::std::ops::Deref for LocalInternedString {
566 fn deref(&self) -> &str { self.string }
569 impl fmt::Debug for LocalInternedString {
570 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
571 fmt::Debug::fmt(self.string, f)
575 impl fmt::Display for LocalInternedString {
576 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
577 fmt::Display::fmt(self.string, f)
581 impl Decodable for LocalInternedString {
582 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
583 Ok(Symbol::intern(&d.read_str()?).as_str())
587 impl Encodable for LocalInternedString {
588 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
589 s.emit_str(self.string)
593 /// Represents a string stored in the string interner.
594 #[derive(Clone, Copy, Eq)]
595 pub struct InternedString {
599 impl InternedString {
600 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
601 let str = with_interner(|interner| {
602 interner.get(self.symbol) as *const str
604 // This is safe because the interner keeps string alive until it is dropped.
605 // We can access it because we know the interner is still alive since we use a
606 // scoped thread local to access it, and it was alive at the beginning of this scope
610 pub fn as_symbol(self) -> Symbol {
614 pub fn as_str(self) -> LocalInternedString {
619 impl Hash for InternedString {
620 fn hash<H: Hasher>(&self, state: &mut H) {
621 self.with(|str| str.hash(state))
625 impl PartialOrd<InternedString> for InternedString {
626 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
627 if self.symbol == other.symbol {
628 return Some(Ordering::Equal);
630 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
634 impl Ord for InternedString {
635 fn cmp(&self, other: &InternedString) -> Ordering {
636 if self.symbol == other.symbol {
637 return Ordering::Equal;
639 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
643 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
644 fn eq(&self, other: &T) -> bool {
645 self.with(|string| string == other.deref())
649 impl PartialEq<InternedString> for InternedString {
650 fn eq(&self, other: &InternedString) -> bool {
651 self.symbol == other.symbol
655 impl PartialEq<InternedString> for str {
656 fn eq(&self, other: &InternedString) -> bool {
657 other.with(|string| self == string)
661 impl<'a> PartialEq<InternedString> for &'a str {
662 fn eq(&self, other: &InternedString) -> bool {
663 other.with(|string| *self == string)
667 impl PartialEq<InternedString> for String {
668 fn eq(&self, other: &InternedString) -> bool {
669 other.with(|string| self == string)
673 impl<'a> PartialEq<InternedString> for &'a String {
674 fn eq(&self, other: &InternedString) -> bool {
675 other.with(|string| *self == string)
679 impl ::std::convert::From<InternedString> for String {
680 fn from(val: InternedString) -> String {
681 val.as_symbol().to_string()
685 impl fmt::Debug for InternedString {
686 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
687 self.with(|str| fmt::Debug::fmt(&str, f))
691 impl fmt::Display for InternedString {
692 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
693 self.with(|str| fmt::Display::fmt(&str, f))
697 impl Decodable for InternedString {
698 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
699 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
703 impl Encodable for InternedString {
704 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
705 self.with(|string| s.emit_str(string))
715 fn interner_tests() {
716 let mut i: Interner = Interner::default();
717 // first one is zero:
718 assert_eq!(i.intern("dog"), Symbol::new(0));
719 // re-use gets the same entry:
720 assert_eq!(i.intern("dog"), Symbol::new(0));
721 // different string gets a different #:
722 assert_eq!(i.intern("cat"), Symbol::new(1));
723 assert_eq!(i.intern("cat"), Symbol::new(1));
724 // dog is still at zero
725 assert_eq!(i.intern("dog"), Symbol::new(0));
726 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
727 // gensym of same string gets new number:
728 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
729 // gensym of *existing* string gets new number:
730 assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
734 fn without_first_quote_test() {
735 GLOBALS.set(&Globals::new(), || {
736 let i = Ident::from_str("'break");
737 assert_eq!(i.without_first_quote().name, keywords::Break.name());