1 //! An "interner" is a data structure that associates values with usize tags and
2 //! allows bidirectional lookup; i.e., given a value, one can easily find the
3 //! type, and vice versa.
5 use arena::DroplessArena;
6 use rustc_data_structures::fx::FxHashMap;
7 use rustc_data_structures::indexed_vec::Idx;
8 use rustc_data_structures::newtype_index;
9 use rustc_macros::symbols;
10 use serialize::{Decodable, Decoder, Encodable, Encoder};
14 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
15 use std::hash::{Hash, Hasher};
17 use crate::hygiene::SyntaxContext;
18 use crate::{Span, DUMMY_SP, GLOBALS};
21 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
22 // this should be rarely necessary though if the keywords are kept in alphabetic order.
24 // Special reserved identifiers used internally for elided lifetimes,
25 // unnamed method parameters, crate root module, error recovery etc.
28 DollarCrate: "$crate",
31 // Keywords that are used in stable Rust.
69 // Keywords that are used in unstable Rust or reserved for future use.
82 // Edition-specific keywords that are used in stable Rust.
83 Dyn: "dyn", // >= 2018 Edition only
85 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
86 Async: "async", // >= 2018 Edition only
87 Await: "await", // >= 2018 Edition only
88 Try: "try", // >= 2018 Edition only
90 // Special lifetime names
91 UnderscoreLifetime: "'_",
92 StaticLifetime: "'static",
94 // Weak keywords, have special meaning only in specific contexts.
98 Existential: "existential",
102 // Other symbols that can be referred to with syntax_pos::symbols::*
109 allow_internal_unsafe,
110 allow_internal_unstable,
111 automatically_derived,
119 default_lib_allocator,
161 omit_gdb_pretty_printer_section,
170 proc_macro_attribute,
174 reexport_test_harness_main,
176 rustc_args_required_const,
178 rustc_const_unstable,
179 rustc_conversion_suggestion,
180 rustc_copy_clone_marker,
184 rustc_dump_program_clauses,
185 rustc_dump_user_substs,
187 rustc_expected_cgu_reuse,
188 rustc_if_this_changed,
189 rustc_inherit_overflow_checks,
191 rustc_layout_scalar_valid_range_end,
192 rustc_layout_scalar_valid_range_start,
194 rustc_on_unimplemented,
197 rustc_partition_codegened,
198 rustc_partition_reused,
199 rustc_proc_macro_decls,
201 rustc_std_internal_symbol,
205 rustc_then_this_would_need,
206 rustc_transparent_macro,
219 unsafe_destructor_blind_to_params,
228 #[derive(Copy, Clone, Eq)]
236 pub const fn new(name: Symbol, span: Span) -> Ident {
241 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
242 Ident::new(name, DUMMY_SP)
245 /// Maps an interned string to an identifier with an empty syntax context.
246 pub fn from_interned_str(string: InternedString) -> Ident {
247 Ident::with_empty_ctxt(string.as_symbol())
250 /// Maps a string to an identifier with an empty syntax context.
251 pub fn from_str(string: &str) -> Ident {
252 Ident::with_empty_ctxt(Symbol::intern(string))
255 /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context.
256 pub fn with_span_pos(self, span: Span) -> Ident {
257 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
260 pub fn without_first_quote(self) -> Ident {
261 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
264 /// "Normalize" ident for use in comparisons using "item hygiene".
265 /// Identifiers with same string value become same if they came from the same "modern" macro
266 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
267 /// different "modern" macros.
268 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
269 pub fn modern(self) -> Ident {
270 Ident::new(self.name, self.span.modern())
273 /// "Normalize" ident for use in comparisons using "local variable hygiene".
274 /// Identifiers with same string value become same if they came from the same non-transparent
275 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
276 /// non-transparent macros.
277 /// Technically, this operation strips all transparent marks from ident's syntactic context.
278 pub fn modern_and_legacy(self) -> Ident {
279 Ident::new(self.name, self.span.modern_and_legacy())
282 pub fn gensym(self) -> Ident {
283 Ident::new(self.name.gensymed(), self.span)
286 pub fn gensym_if_underscore(self) -> Ident {
287 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
290 pub fn as_str(self) -> LocalInternedString {
294 pub fn as_interned_str(self) -> InternedString {
295 self.name.as_interned_str()
299 impl PartialEq for Ident {
300 fn eq(&self, rhs: &Self) -> bool {
301 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
305 impl Hash for Ident {
306 fn hash<H: Hasher>(&self, state: &mut H) {
307 self.name.hash(state);
308 self.span.ctxt().hash(state);
312 impl fmt::Debug for Ident {
313 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
314 write!(f, "{}{:?}", self.name, self.span.ctxt())
318 impl fmt::Display for Ident {
319 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
320 fmt::Display::fmt(&self.name, f)
324 impl Encodable for Ident {
325 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
326 if self.span.ctxt().modern() == SyntaxContext::empty() {
327 s.emit_str(&self.as_str())
328 } else { // FIXME(jseyfried): intercrate hygiene
329 let mut string = "#".to_owned();
330 string.push_str(&self.as_str());
336 impl Decodable for Ident {
337 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
338 let string = d.read_str()?;
339 Ok(if !string.starts_with('#') {
340 Ident::from_str(&string)
341 } else { // FIXME(jseyfried): intercrate hygiene
342 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
347 /// A symbol is an interned or gensymed string. A gensym is a symbol that is
348 /// never equal to any other symbol. E.g.:
350 /// assert_eq!(Symbol::intern("x"), Symbol::intern("x"))
351 /// assert_ne!(Symbol::gensym("x"), Symbol::intern("x"))
352 /// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x"))
354 /// Conceptually, a gensym can be thought of as a normal symbol with an
355 /// invisible unique suffix. Gensyms are useful when creating new identifiers
356 /// that must not match any existing identifiers, e.g. during macro expansion
357 /// and syntax desugaring.
359 /// Internally, a Symbol is implemented as an index, and all operations
360 /// (including hashing, equality, and ordering) operate on that index. The use
361 /// of `newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
362 /// because `newtype_index!` reserves the last 256 values for tagging purposes.
364 /// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
365 /// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
366 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
367 pub struct Symbol(SymbolIndex);
370 pub struct SymbolIndex { .. }
374 const fn new(n: u32) -> Self {
375 Symbol(SymbolIndex::from_u32_const(n))
378 /// Maps a string to its interned representation.
379 pub fn intern(string: &str) -> Self {
380 with_interner(|interner| interner.intern(string))
383 pub fn interned(self) -> Self {
384 with_interner(|interner| interner.interned(self))
387 /// Gensyms a new `usize`, using the current interner.
388 pub fn gensym(string: &str) -> Self {
389 with_interner(|interner| interner.gensym(string))
392 pub fn gensymed(self) -> Self {
393 with_interner(|interner| interner.gensymed(self))
396 // WARNING: this function is deprecated and will be removed in the future.
397 pub fn is_gensymed(self) -> bool {
398 with_interner(|interner| interner.is_gensymed(self))
401 pub fn as_str(self) -> LocalInternedString {
402 with_interner(|interner| unsafe {
403 LocalInternedString {
404 string: std::mem::transmute::<&str, &str>(interner.get(self))
409 pub fn as_interned_str(self) -> InternedString {
410 with_interner(|interner| InternedString {
411 symbol: interner.interned(self)
415 pub fn as_u32(self) -> u32 {
420 impl fmt::Debug for Symbol {
421 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
424 write!(f, "{}({:?})", self, self.0)
426 write!(f, "{}", self)
431 impl fmt::Display for Symbol {
432 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433 fmt::Display::fmt(&self.as_str(), f)
437 impl Encodable for Symbol {
438 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
439 s.emit_str(&self.as_str())
443 impl Decodable for Symbol {
444 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
445 Ok(Symbol::intern(&d.read_str()?))
449 impl<T: std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
450 fn eq(&self, other: &T) -> bool {
451 self.as_str() == other.deref()
455 // The `&'static str`s in this type actually point into the arena.
457 // Note that normal symbols are indexed upward from 0, and gensyms are indexed
458 // downward from SymbolIndex::MAX_AS_U32.
460 pub struct Interner {
461 arena: DroplessArena,
462 names: FxHashMap<&'static str, Symbol>,
463 strings: Vec<&'static str>,
464 gensyms: Vec<Symbol>,
468 fn prefill(init: &[&str]) -> Self {
469 let mut this = Interner::default();
470 for &string in init {
472 // We can't allocate empty strings in the arena, so handle this here.
473 let name = Symbol::new(this.strings.len() as u32);
474 this.names.insert("", name);
475 this.strings.push("");
483 pub fn intern(&mut self, string: &str) -> Symbol {
484 if let Some(&name) = self.names.get(string) {
488 let name = Symbol::new(self.strings.len() as u32);
490 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
492 let string: &str = unsafe {
493 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
495 // It is safe to extend the arena allocation to `'static` because we only access
496 // these while the arena is still alive.
497 let string: &'static str = unsafe {
498 &*(string as *const str)
500 self.strings.push(string);
501 self.names.insert(string, name);
505 pub fn interned(&self, symbol: Symbol) -> Symbol {
506 if (symbol.0.as_usize()) < self.strings.len() {
509 self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
513 fn gensym(&mut self, string: &str) -> Symbol {
514 let symbol = self.intern(string);
515 self.gensymed(symbol)
518 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
519 self.gensyms.push(symbol);
520 Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
523 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
524 symbol.0.as_usize() >= self.strings.len()
527 // Get the symbol as a string. `Symbol::as_str()` should be used in
528 // preference to this function.
529 pub fn get(&self, symbol: Symbol) -> &str {
530 match self.strings.get(symbol.0.as_usize()) {
531 Some(string) => string,
533 let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
534 self.strings[symbol.0.as_usize()]
541 use super::{Symbol, Ident};
543 #[derive(Clone, Copy, PartialEq, Eq)]
550 pub fn ident(self) -> Ident {
555 pub fn name(self) -> Symbol {
569 fn is_used_keyword_2018(self) -> bool {
570 self == keywords::Dyn.name()
573 fn is_unused_keyword_2018(self) -> bool {
574 self >= keywords::Async.name() && self <= keywords::Try.name()
579 // Returns `true` for reserved identifiers used internally for elided lifetimes,
580 // unnamed method parameters, crate root module, error recovery etc.
581 pub fn is_special(self) -> bool {
582 self.name <= keywords::Underscore.name()
585 /// Returns `true` if the token is a keyword used in the language.
586 pub fn is_used_keyword(self) -> bool {
587 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
588 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
589 self.name.is_used_keyword_2018() && self.span.rust_2018()
592 /// Returns `true` if the token is a keyword reserved for possible future use.
593 pub fn is_unused_keyword(self) -> bool {
594 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
595 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
596 self.name.is_unused_keyword_2018() && self.span.rust_2018()
599 /// Returns `true` if the token is either a special identifier or a keyword.
600 pub fn is_reserved(self) -> bool {
601 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
604 /// A keyword or reserved identifier that can be used as a path segment.
605 pub fn is_path_segment_keyword(self) -> bool {
606 self.name == keywords::Super.name() ||
607 self.name == keywords::SelfLower.name() ||
608 self.name == keywords::SelfUpper.name() ||
609 self.name == keywords::Crate.name() ||
610 self.name == keywords::PathRoot.name() ||
611 self.name == keywords::DollarCrate.name()
614 /// This identifier can be a raw identifier.
615 pub fn can_be_raw(self) -> bool {
616 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
617 !self.is_path_segment_keyword()
620 /// We see this identifier in a normal identifier position, like variable name or a type.
621 /// How was it written originally? Did it use the raw form? Let's try to guess.
622 pub fn is_raw_guess(self) -> bool {
623 self.can_be_raw() && self.is_reserved()
627 // If an interner exists, return it. Otherwise, prepare a fresh one.
629 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
630 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
633 /// An alternative to `Symbol` and `InternedString`, useful when the chars
634 /// within the symbol need to be accessed. It is best used for temporary
637 /// Because the interner outlives any thread which uses this type, we can
638 /// safely treat `string` which points to interner data, as an immortal string,
639 /// as long as this type never crosses between threads.
641 // FIXME: ensure that the interner outlives any thread which uses
642 // `LocalInternedString`, by creating a new thread right after constructing the
644 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
645 pub struct LocalInternedString {
646 string: &'static str,
649 impl LocalInternedString {
650 pub fn as_interned_str(self) -> InternedString {
652 symbol: Symbol::intern(self.string)
656 pub fn get(&self) -> &str {
657 // This returns a valid string since we ensure that `self` outlives the interner
658 // by creating the interner on a thread which outlives threads which can access it.
659 // This type cannot move to a thread which outlives the interner since it does
660 // not implement Send.
665 impl<U: ?Sized> std::convert::AsRef<U> for LocalInternedString
667 str: std::convert::AsRef<U>
669 fn as_ref(&self) -> &U {
674 impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for LocalInternedString {
675 fn eq(&self, other: &T) -> bool {
676 self.string == other.deref()
680 impl std::cmp::PartialEq<LocalInternedString> for str {
681 fn eq(&self, other: &LocalInternedString) -> bool {
686 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a str {
687 fn eq(&self, other: &LocalInternedString) -> bool {
688 *self == other.string
692 impl std::cmp::PartialEq<LocalInternedString> for String {
693 fn eq(&self, other: &LocalInternedString) -> bool {
698 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a String {
699 fn eq(&self, other: &LocalInternedString) -> bool {
700 *self == other.string
704 impl !Send for LocalInternedString {}
705 impl !Sync for LocalInternedString {}
707 impl std::ops::Deref for LocalInternedString {
709 fn deref(&self) -> &str { self.string }
712 impl fmt::Debug for LocalInternedString {
713 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
714 fmt::Debug::fmt(self.string, f)
718 impl fmt::Display for LocalInternedString {
719 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
720 fmt::Display::fmt(self.string, f)
724 impl Decodable for LocalInternedString {
725 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
726 Ok(Symbol::intern(&d.read_str()?).as_str())
730 impl Encodable for LocalInternedString {
731 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
732 s.emit_str(self.string)
736 /// An alternative to `Symbol` that is focused on string contents. It has two
737 /// main differences to `Symbol`.
739 /// First, its implementations of `Hash`, `PartialOrd` and `Ord` work with the
740 /// string chars rather than the symbol integer. This is useful when hash
741 /// stability is required across compile sessions, or a guaranteed sort
742 /// ordering is required.
744 /// Second, gensym-ness is irrelevant. E.g.:
746 /// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x"))
747 /// assert_eq!(Symbol::gensym("x").as_interned_str(), Symbol::gensym("x").as_interned_str())
749 #[derive(Clone, Copy, Eq)]
750 pub struct InternedString {
754 impl InternedString {
755 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
756 let str = with_interner(|interner| {
757 interner.get(self.symbol) as *const str
759 // This is safe because the interner keeps string alive until it is dropped.
760 // We can access it because we know the interner is still alive since we use a
761 // scoped thread local to access it, and it was alive at the beginning of this scope
765 fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
766 let (self_str, other_str) = with_interner(|interner| {
767 (interner.get(self.symbol) as *const str,
768 interner.get(other.symbol) as *const str)
770 // This is safe for the same reason that `with` is safe.
771 unsafe { f(&*self_str, &*other_str) }
774 pub fn as_symbol(self) -> Symbol {
778 pub fn as_str(self) -> LocalInternedString {
783 impl Hash for InternedString {
784 fn hash<H: Hasher>(&self, state: &mut H) {
785 self.with(|str| str.hash(state))
789 impl PartialOrd<InternedString> for InternedString {
790 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
791 if self.symbol == other.symbol {
792 return Some(Ordering::Equal);
794 self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
798 impl Ord for InternedString {
799 fn cmp(&self, other: &InternedString) -> Ordering {
800 if self.symbol == other.symbol {
801 return Ordering::Equal;
803 self.with2(other, |self_str, other_str| self_str.cmp(other_str))
807 impl<T: std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
808 fn eq(&self, other: &T) -> bool {
809 self.with(|string| string == other.deref())
813 impl PartialEq<InternedString> for InternedString {
814 fn eq(&self, other: &InternedString) -> bool {
815 self.symbol == other.symbol
819 impl PartialEq<InternedString> for str {
820 fn eq(&self, other: &InternedString) -> bool {
821 other.with(|string| self == string)
825 impl<'a> PartialEq<InternedString> for &'a str {
826 fn eq(&self, other: &InternedString) -> bool {
827 other.with(|string| *self == string)
831 impl PartialEq<InternedString> for String {
832 fn eq(&self, other: &InternedString) -> bool {
833 other.with(|string| self == string)
837 impl<'a> PartialEq<InternedString> for &'a String {
838 fn eq(&self, other: &InternedString) -> bool {
839 other.with(|string| *self == string)
843 impl fmt::Debug for InternedString {
844 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
845 self.with(|str| fmt::Debug::fmt(&str, f))
849 impl fmt::Display for InternedString {
850 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
851 self.with(|str| fmt::Display::fmt(&str, f))
855 impl Decodable for InternedString {
856 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
857 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
861 impl Encodable for InternedString {
862 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
863 self.with(|string| s.emit_str(string))
873 fn interner_tests() {
874 let mut i: Interner = Interner::default();
875 // first one is zero:
876 assert_eq!(i.intern("dog"), Symbol::new(0));
877 // re-use gets the same entry:
878 assert_eq!(i.intern("dog"), Symbol::new(0));
879 // different string gets a different #:
880 assert_eq!(i.intern("cat"), Symbol::new(1));
881 assert_eq!(i.intern("cat"), Symbol::new(1));
882 // dog is still at zero
883 assert_eq!(i.intern("dog"), Symbol::new(0));
884 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
885 // gensym of same string gets new number:
886 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
887 // gensym of *existing* string gets new number:
888 assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
892 fn without_first_quote_test() {
893 GLOBALS.set(&Globals::new(), || {
894 let i = Ident::from_str("'break");
895 assert_eq!(i.without_first_quote().name, keywords::Break.name());