1 //! An "interner" is a data structure that associates values with usize tags and
2 //! allows bidirectional lookup; i.e., given a value, one can easily find the
3 //! type, and vice versa.
5 use arena::DroplessArena;
6 use rustc_data_structures::fx::FxHashMap;
7 use rustc_data_structures::indexed_vec::Idx;
8 use rustc_data_structures::newtype_index;
9 use serialize::{Decodable, Decoder, Encodable, Encoder};
13 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
14 use std::hash::{Hash, Hasher};
16 use crate::hygiene::SyntaxContext;
17 use crate::{Span, DUMMY_SP, GLOBALS};
19 #[derive(Copy, Clone, Eq)]
27 pub const fn new(name: Symbol, span: Span) -> Ident {
32 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
33 Ident::new(name, DUMMY_SP)
36 /// Maps an interned string to an identifier with an empty syntax context.
37 pub fn from_interned_str(string: InternedString) -> Ident {
38 Ident::with_empty_ctxt(string.as_symbol())
41 /// Maps a string to an identifier with an empty syntax context.
42 pub fn from_str(string: &str) -> Ident {
43 Ident::with_empty_ctxt(Symbol::intern(string))
46 /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context.
47 pub fn with_span_pos(self, span: Span) -> Ident {
48 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
51 pub fn without_first_quote(self) -> Ident {
52 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
55 /// "Normalize" ident for use in comparisons using "item hygiene".
56 /// Identifiers with same string value become same if they came from the same "modern" macro
57 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
58 /// different "modern" macros.
59 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
60 pub fn modern(self) -> Ident {
61 Ident::new(self.name, self.span.modern())
64 /// "Normalize" ident for use in comparisons using "local variable hygiene".
65 /// Identifiers with same string value become same if they came from the same non-transparent
66 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
67 /// non-transparent macros.
68 /// Technically, this operation strips all transparent marks from ident's syntactic context.
69 pub fn modern_and_legacy(self) -> Ident {
70 Ident::new(self.name, self.span.modern_and_legacy())
73 pub fn gensym(self) -> Ident {
74 Ident::new(self.name.gensymed(), self.span)
77 pub fn gensym_if_underscore(self) -> Ident {
78 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
81 pub fn as_str(self) -> LocalInternedString {
85 pub fn as_interned_str(self) -> InternedString {
86 self.name.as_interned_str()
90 impl PartialEq for Ident {
91 fn eq(&self, rhs: &Self) -> bool {
92 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
97 fn hash<H: Hasher>(&self, state: &mut H) {
98 self.name.hash(state);
99 self.span.ctxt().hash(state);
103 impl fmt::Debug for Ident {
104 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 write!(f, "{}{:?}", self.name, self.span.ctxt())
109 impl fmt::Display for Ident {
110 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111 fmt::Display::fmt(&self.name, f)
115 impl Encodable for Ident {
116 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
117 if self.span.ctxt().modern() == SyntaxContext::empty() {
118 s.emit_str(&self.as_str())
119 } else { // FIXME(jseyfried): intercrate hygiene
120 let mut string = "#".to_owned();
121 string.push_str(&self.as_str());
127 impl Decodable for Ident {
128 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
129 let string = d.read_str()?;
130 Ok(if !string.starts_with('#') {
131 Ident::from_str(&string)
132 } else { // FIXME(jseyfried): intercrate hygiene
133 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
138 /// A symbol is an interned or gensymed string. The use of `newtype_index!` means
139 /// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index! reserves
140 /// the last 256 values for tagging purposes.
142 /// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
143 /// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
145 pub struct Symbol(SymbolIndex);
148 pub struct SymbolIndex { .. }
151 // The interner is pointed to by a thread local value which is only set on the main thread
152 // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
153 // to avoid panics and other errors, even though it would be memory safe to do so.
154 #[cfg(not(parallel_compiler))]
155 impl !Send for Symbol { }
156 #[cfg(not(parallel_compiler))]
157 impl !Sync for Symbol { }
160 const fn new(n: u32) -> Self {
161 Symbol(SymbolIndex::from_u32_const(n))
164 /// Maps a string to its interned representation.
165 pub fn intern(string: &str) -> Self {
166 with_interner(|interner| interner.intern(string))
169 pub fn interned(self) -> Self {
170 with_interner(|interner| interner.interned(self))
173 /// Gensyms a new `usize`, using the current interner.
174 pub fn gensym(string: &str) -> Self {
175 with_interner(|interner| interner.gensym(string))
178 pub fn gensymed(self) -> Self {
179 with_interner(|interner| interner.gensymed(self))
182 pub fn as_str(self) -> LocalInternedString {
183 with_interner(|interner| unsafe {
184 LocalInternedString {
185 string: std::mem::transmute::<&str, &str>(interner.get(self))
190 pub fn as_interned_str(self) -> InternedString {
191 with_interner(|interner| InternedString {
192 symbol: interner.interned(self)
196 pub fn as_u32(self) -> u32 {
201 impl fmt::Debug for Symbol {
202 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
205 write!(f, "{}({:?})", self, self.0)
207 write!(f, "{}", self)
212 impl fmt::Display for Symbol {
213 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214 fmt::Display::fmt(&self.as_str(), f)
218 impl Encodable for Symbol {
219 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
220 s.emit_str(&self.as_str())
224 impl Decodable for Symbol {
225 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
226 Ok(Symbol::intern(&d.read_str()?))
230 impl<T: std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
231 fn eq(&self, other: &T) -> bool {
232 self.as_str() == other.deref()
236 // The `&'static str`s in this type actually point into the arena.
238 // Note that normal symbols are indexed upward from 0, and gensyms are indexed
239 // downward from SymbolIndex::MAX_AS_U32.
241 pub struct Interner {
242 arena: DroplessArena,
243 names: FxHashMap<&'static str, Symbol>,
244 strings: Vec<&'static str>,
245 gensyms: Vec<Symbol>,
249 fn prefill(init: &[&str]) -> Self {
250 let mut this = Interner::default();
251 for &string in init {
253 // We can't allocate empty strings in the arena, so handle this here.
254 let name = Symbol::new(this.strings.len() as u32);
255 this.names.insert("", name);
256 this.strings.push("");
264 pub fn intern(&mut self, string: &str) -> Symbol {
265 if let Some(&name) = self.names.get(string) {
269 let name = Symbol::new(self.strings.len() as u32);
271 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
273 let string: &str = unsafe {
274 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
276 // It is safe to extend the arena allocation to `'static` because we only access
277 // these while the arena is still alive.
278 let string: &'static str = unsafe {
279 &*(string as *const str)
281 self.strings.push(string);
282 self.names.insert(string, name);
286 pub fn interned(&self, symbol: Symbol) -> Symbol {
287 if (symbol.0.as_usize()) < self.strings.len() {
290 self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
294 fn gensym(&mut self, string: &str) -> Symbol {
295 let symbol = self.intern(string);
296 self.gensymed(symbol)
299 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
300 self.gensyms.push(symbol);
301 Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
304 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
305 symbol.0.as_usize() >= self.strings.len()
308 pub fn get(&self, symbol: Symbol) -> &str {
309 match self.strings.get(symbol.0.as_usize()) {
310 Some(string) => string,
311 None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
316 // In this macro, there is the requirement that the name (the number) must be monotonically
317 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
318 // except starting from the next number instead of zero.
319 macro_rules! declare_keywords {(
320 $( ($index: expr, $konst: ident, $string: expr) )*
323 use super::{Symbol, Ident};
324 #[derive(Clone, Copy, PartialEq, Eq)]
329 #[inline] pub fn ident(self) -> Ident { self.ident }
330 #[inline] pub fn name(self) -> Symbol { self.ident.name }
333 #[allow(non_upper_case_globals)]
334 pub const $konst: Keyword = Keyword {
335 ident: Ident::with_empty_ctxt(super::Symbol::new($index))
339 impl std::str::FromStr for Keyword {
342 fn from_str(s: &str) -> Result<Self, ()> {
344 $($string => Ok($konst),)*
352 pub fn fresh() -> Self {
353 Interner::prefill(&[$($string,)*])
358 // N.B., leaving holes in the ident table is bad! a different ident will get
359 // interned with the id from the hole, but it will be between the min and max
360 // of the reserved words, and thus tagged as "reserved".
361 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
362 // this should be rarely necessary though if the keywords are kept in alphabetic order.
364 // Special reserved identifiers used internally for elided lifetimes,
365 // unnamed method parameters, crate root module, error recovery etc.
367 (1, PathRoot, "{{root}}")
368 (2, DollarCrate, "$crate")
371 // Keywords that are used in stable Rust.
376 (8, Continue, "continue")
380 (12, Extern, "extern")
395 (27, Return, "return")
396 (28, SelfLower, "self")
397 (29, SelfUpper, "Self")
398 (30, Static, "static")
399 (31, Struct, "struct")
404 (36, Unsafe, "unsafe")
409 // Keywords that are used in unstable Rust or reserved for future use.
410 (40, Abstract, "abstract")
411 (41, Become, "become")
415 (45, Override, "override")
417 (47, Typeof, "typeof")
418 (48, Unsized, "unsized")
419 (49, Virtual, "virtual")
422 // Edition-specific keywords that are used in stable Rust.
423 (51, Dyn, "dyn") // >= 2018 Edition only
425 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
426 (52, Async, "async") // >= 2018 Edition only
427 (53, Try, "try") // >= 2018 Edition only
429 // Special lifetime names
430 (54, UnderscoreLifetime, "'_")
431 (55, StaticLifetime, "'static")
433 // Weak keywords, have special meaning only in specific contexts.
436 (58, Default, "default")
437 (59, Existential, "existential")
442 fn is_used_keyword_2018(self) -> bool {
443 self == keywords::Dyn.name()
446 fn is_unused_keyword_2018(self) -> bool {
447 self >= keywords::Async.name() && self <= keywords::Try.name()
452 // Returns `true` for reserved identifiers used internally for elided lifetimes,
453 // unnamed method parameters, crate root module, error recovery etc.
454 pub fn is_special(self) -> bool {
455 self.name <= keywords::Underscore.name()
458 /// Returns `true` if the token is a keyword used in the language.
459 pub fn is_used_keyword(self) -> bool {
460 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
461 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
462 self.name.is_used_keyword_2018() && self.span.rust_2018()
465 /// Returns `true` if the token is a keyword reserved for possible future use.
466 pub fn is_unused_keyword(self) -> bool {
467 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
468 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
469 self.name.is_unused_keyword_2018() && self.span.rust_2018()
472 /// Returns `true` if the token is either a special identifier or a keyword.
473 pub fn is_reserved(self) -> bool {
474 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
477 /// A keyword or reserved identifier that can be used as a path segment.
478 pub fn is_path_segment_keyword(self) -> bool {
479 self.name == keywords::Super.name() ||
480 self.name == keywords::SelfLower.name() ||
481 self.name == keywords::SelfUpper.name() ||
482 self.name == keywords::Crate.name() ||
483 self.name == keywords::PathRoot.name() ||
484 self.name == keywords::DollarCrate.name()
487 /// This identifier can be a raw identifier.
488 pub fn can_be_raw(self) -> bool {
489 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
490 !self.is_path_segment_keyword()
493 /// We see this identifier in a normal identifier position, like variable name or a type.
494 /// How was it written originally? Did it use the raw form? Let's try to guess.
495 pub fn is_raw_guess(self) -> bool {
496 self.can_be_raw() && self.is_reserved()
500 // If an interner exists, return it. Otherwise, prepare a fresh one.
502 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
503 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
506 /// Represents a string stored in the interner. Because the interner outlives any thread
507 /// which uses this type, we can safely treat `string` which points to interner data,
508 /// as an immortal string, as long as this type never crosses between threads.
509 // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
510 // by creating a new thread right after constructing the interner.
511 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
512 pub struct LocalInternedString {
513 string: &'static str,
516 impl LocalInternedString {
517 pub fn as_interned_str(self) -> InternedString {
519 symbol: Symbol::intern(self.string)
523 pub fn get(&self) -> &'static str {
528 impl<U: ?Sized> std::convert::AsRef<U> for LocalInternedString
530 str: std::convert::AsRef<U>
532 fn as_ref(&self) -> &U {
537 impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for LocalInternedString {
538 fn eq(&self, other: &T) -> bool {
539 self.string == other.deref()
543 impl std::cmp::PartialEq<LocalInternedString> for str {
544 fn eq(&self, other: &LocalInternedString) -> bool {
549 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a str {
550 fn eq(&self, other: &LocalInternedString) -> bool {
551 *self == other.string
555 impl std::cmp::PartialEq<LocalInternedString> for String {
556 fn eq(&self, other: &LocalInternedString) -> bool {
561 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a String {
562 fn eq(&self, other: &LocalInternedString) -> bool {
563 *self == other.string
567 impl !Send for LocalInternedString {}
568 impl !Sync for LocalInternedString {}
570 impl std::ops::Deref for LocalInternedString {
572 fn deref(&self) -> &str { self.string }
575 impl fmt::Debug for LocalInternedString {
576 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
577 fmt::Debug::fmt(self.string, f)
581 impl fmt::Display for LocalInternedString {
582 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
583 fmt::Display::fmt(self.string, f)
587 impl Decodable for LocalInternedString {
588 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
589 Ok(Symbol::intern(&d.read_str()?).as_str())
593 impl Encodable for LocalInternedString {
594 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
595 s.emit_str(self.string)
599 /// Represents a string stored in the string interner.
600 #[derive(Clone, Copy, Eq)]
601 pub struct InternedString {
605 impl InternedString {
606 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
607 let str = with_interner(|interner| {
608 interner.get(self.symbol) as *const str
610 // This is safe because the interner keeps string alive until it is dropped.
611 // We can access it because we know the interner is still alive since we use a
612 // scoped thread local to access it, and it was alive at the beginning of this scope
616 pub fn as_symbol(self) -> Symbol {
620 pub fn as_str(self) -> LocalInternedString {
625 impl Hash for InternedString {
626 fn hash<H: Hasher>(&self, state: &mut H) {
627 self.with(|str| str.hash(state))
631 impl PartialOrd<InternedString> for InternedString {
632 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
633 if self.symbol == other.symbol {
634 return Some(Ordering::Equal);
636 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
640 impl Ord for InternedString {
641 fn cmp(&self, other: &InternedString) -> Ordering {
642 if self.symbol == other.symbol {
643 return Ordering::Equal;
645 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
649 impl<T: std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
650 fn eq(&self, other: &T) -> bool {
651 self.with(|string| string == other.deref())
655 impl PartialEq<InternedString> for InternedString {
656 fn eq(&self, other: &InternedString) -> bool {
657 self.symbol == other.symbol
661 impl PartialEq<InternedString> for str {
662 fn eq(&self, other: &InternedString) -> bool {
663 other.with(|string| self == string)
667 impl<'a> PartialEq<InternedString> for &'a str {
668 fn eq(&self, other: &InternedString) -> bool {
669 other.with(|string| *self == string)
673 impl PartialEq<InternedString> for String {
674 fn eq(&self, other: &InternedString) -> bool {
675 other.with(|string| self == string)
679 impl<'a> PartialEq<InternedString> for &'a String {
680 fn eq(&self, other: &InternedString) -> bool {
681 other.with(|string| *self == string)
685 impl std::convert::From<InternedString> for String {
686 fn from(val: InternedString) -> String {
687 val.as_symbol().to_string()
691 impl fmt::Debug for InternedString {
692 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
693 self.with(|str| fmt::Debug::fmt(&str, f))
697 impl fmt::Display for InternedString {
698 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
699 self.with(|str| fmt::Display::fmt(&str, f))
703 impl Decodable for InternedString {
704 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
705 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
709 impl Encodable for InternedString {
710 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
711 self.with(|string| s.emit_str(string))
721 fn interner_tests() {
722 let mut i: Interner = Interner::default();
723 // first one is zero:
724 assert_eq!(i.intern("dog"), Symbol::new(0));
725 // re-use gets the same entry:
726 assert_eq!(i.intern("dog"), Symbol::new(0));
727 // different string gets a different #:
728 assert_eq!(i.intern("cat"), Symbol::new(1));
729 assert_eq!(i.intern("cat"), Symbol::new(1));
730 // dog is still at zero
731 assert_eq!(i.intern("dog"), Symbol::new(0));
732 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
733 // gensym of same string gets new number:
734 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
735 // gensym of *existing* string gets new number:
736 assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
740 fn without_first_quote_test() {
741 GLOBALS.set(&Globals::new(), || {
742 let i = Ident::from_str("'break");
743 assert_eq!(i.without_first_quote().name, keywords::Break.name());