1 //! An "interner" is a data structure that associates values with usize tags and
2 //! allows bidirectional lookup; i.e., given a value, one can easily find the
3 //! type, and vice versa.
5 use arena::DroplessArena;
6 use rustc_data_structures::fx::FxHashMap;
7 use rustc_data_structures::indexed_vec::Idx;
8 use rustc_data_structures::newtype_index;
9 use rustc_macros::symbols;
10 use serialize::{Decodable, Decoder, Encodable, Encoder};
14 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
15 use std::hash::{Hash, Hasher};
17 use crate::hygiene::SyntaxContext;
18 use crate::{Span, DUMMY_SP, GLOBALS};
21 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
22 // this should be rarely necessary though if the keywords are kept in alphabetic order.
24 // Special reserved identifiers used internally for elided lifetimes,
25 // unnamed method parameters, crate root module, error recovery etc.
28 DollarCrate: "$crate",
31 // Keywords that are used in stable Rust.
69 // Keywords that are used in unstable Rust or reserved for future use.
82 // Edition-specific keywords that are used in stable Rust.
83 Dyn: "dyn", // >= 2018 Edition only
85 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
86 Async: "async", // >= 2018 Edition only
87 Await: "await", // >= 2018 Edition only
88 Try: "try", // >= 2018 Edition only
90 // Special lifetime names
91 UnderscoreLifetime: "'_",
92 StaticLifetime: "'static",
94 // Weak keywords, have special meaning only in specific contexts.
98 Existential: "existential",
102 // Other symbols that can be referred to with syntax_pos::symbols::*
109 allow_internal_unsafe,
110 allow_internal_unstable,
111 automatically_derived,
119 default_lib_allocator,
161 omit_gdb_pretty_printer_section,
170 proc_macro_attribute,
174 reexport_test_harness_main,
176 rustc_args_required_const,
178 rustc_const_unstable,
179 rustc_conversion_suggestion,
180 rustc_copy_clone_marker,
184 rustc_dump_program_clauses,
185 rustc_dump_user_substs,
187 rustc_expected_cgu_reuse,
188 rustc_if_this_changed,
189 rustc_inherit_overflow_checks,
191 rustc_layout_scalar_valid_range_end,
192 rustc_layout_scalar_valid_range_start,
194 rustc_on_unimplemented,
197 rustc_partition_codegened,
198 rustc_partition_reused,
199 rustc_proc_macro_decls,
201 rustc_std_internal_symbol,
205 rustc_then_this_would_need,
206 rustc_transparent_macro,
219 unsafe_destructor_blind_to_params,
228 #[derive(Copy, Clone, Eq)]
236 pub const fn new(name: Symbol, span: Span) -> Ident {
241 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
242 Ident::new(name, DUMMY_SP)
245 /// Maps an interned string to an identifier with an empty syntax context.
246 pub fn from_interned_str(string: InternedString) -> Ident {
247 Ident::with_empty_ctxt(string.as_symbol())
250 /// Maps a string to an identifier with an empty syntax context.
251 pub fn from_str(string: &str) -> Ident {
252 Ident::with_empty_ctxt(Symbol::intern(string))
255 /// Replaces `lo` and `hi` with those from `span`, but keep hygiene context.
256 pub fn with_span_pos(self, span: Span) -> Ident {
257 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
260 pub fn without_first_quote(self) -> Ident {
261 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
264 /// "Normalize" ident for use in comparisons using "item hygiene".
265 /// Identifiers with same string value become same if they came from the same "modern" macro
266 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
267 /// different "modern" macros.
268 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
269 pub fn modern(self) -> Ident {
270 Ident::new(self.name, self.span.modern())
273 /// "Normalize" ident for use in comparisons using "local variable hygiene".
274 /// Identifiers with same string value become same if they came from the same non-transparent
275 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
276 /// non-transparent macros.
277 /// Technically, this operation strips all transparent marks from ident's syntactic context.
278 pub fn modern_and_legacy(self) -> Ident {
279 Ident::new(self.name, self.span.modern_and_legacy())
282 pub fn gensym(self) -> Ident {
283 Ident::new(self.name.gensymed(), self.span)
286 pub fn gensym_if_underscore(self) -> Ident {
287 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
290 pub fn as_str(self) -> LocalInternedString {
294 pub fn as_interned_str(self) -> InternedString {
295 self.name.as_interned_str()
299 impl PartialEq for Ident {
300 fn eq(&self, rhs: &Self) -> bool {
301 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
305 impl Hash for Ident {
306 fn hash<H: Hasher>(&self, state: &mut H) {
307 self.name.hash(state);
308 self.span.ctxt().hash(state);
312 impl fmt::Debug for Ident {
313 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
314 write!(f, "{}{:?}", self.name, self.span.ctxt())
318 impl fmt::Display for Ident {
319 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
320 fmt::Display::fmt(&self.name, f)
324 impl Encodable for Ident {
325 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
326 if self.span.ctxt().modern() == SyntaxContext::empty() {
327 s.emit_str(&self.as_str())
328 } else { // FIXME(jseyfried): intercrate hygiene
329 let mut string = "#".to_owned();
330 string.push_str(&self.as_str());
336 impl Decodable for Ident {
337 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
338 let string = d.read_str()?;
339 Ok(if !string.starts_with('#') {
340 Ident::from_str(&string)
341 } else { // FIXME(jseyfried): intercrate hygiene
342 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
347 /// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348 /// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349 /// the last 256 values for tagging purposes.
351 /// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352 /// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
353 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
354 pub struct Symbol(SymbolIndex);
357 pub struct SymbolIndex { .. }
361 const fn new(n: u32) -> Self {
362 Symbol(SymbolIndex::from_u32_const(n))
365 /// Maps a string to its interned representation.
366 pub fn intern(string: &str) -> Self {
367 with_interner(|interner| interner.intern(string))
370 pub fn interned(self) -> Self {
371 with_interner(|interner| interner.interned(self))
374 /// Gensyms a new `usize`, using the current interner.
375 pub fn gensym(string: &str) -> Self {
376 with_interner(|interner| interner.gensym(string))
379 pub fn gensymed(self) -> Self {
380 with_interner(|interner| interner.gensymed(self))
383 pub fn is_gensymed(self) -> bool {
384 with_interner(|interner| interner.is_gensymed(self))
387 pub fn as_str(self) -> LocalInternedString {
388 with_interner(|interner| unsafe {
389 LocalInternedString {
390 string: std::mem::transmute::<&str, &str>(interner.get(self))
395 pub fn as_interned_str(self) -> InternedString {
396 with_interner(|interner| InternedString {
397 symbol: interner.interned(self)
401 pub fn as_u32(self) -> u32 {
406 impl fmt::Debug for Symbol {
407 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
408 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
410 write!(f, "{}({:?})", self, self.0)
412 write!(f, "{}", self)
417 impl fmt::Display for Symbol {
418 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419 fmt::Display::fmt(&self.as_str(), f)
423 impl Encodable for Symbol {
424 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
425 s.emit_str(&self.as_str())
429 impl Decodable for Symbol {
430 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
431 Ok(Symbol::intern(&d.read_str()?))
435 impl<T: std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
436 fn eq(&self, other: &T) -> bool {
437 self.as_str() == other.deref()
441 // The `&'static str`s in this type actually point into the arena.
443 // Note that normal symbols are indexed upward from 0, and gensyms are indexed
444 // downward from SymbolIndex::MAX_AS_U32.
446 pub struct Interner {
447 arena: DroplessArena,
448 names: FxHashMap<&'static str, Symbol>,
449 strings: Vec<&'static str>,
450 gensyms: Vec<Symbol>,
454 fn prefill(init: &[&str]) -> Self {
455 let mut this = Interner::default();
456 for &string in init {
458 // We can't allocate empty strings in the arena, so handle this here.
459 let name = Symbol::new(this.strings.len() as u32);
460 this.names.insert("", name);
461 this.strings.push("");
469 pub fn intern(&mut self, string: &str) -> Symbol {
470 if let Some(&name) = self.names.get(string) {
474 let name = Symbol::new(self.strings.len() as u32);
476 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
478 let string: &str = unsafe {
479 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
481 // It is safe to extend the arena allocation to `'static` because we only access
482 // these while the arena is still alive.
483 let string: &'static str = unsafe {
484 &*(string as *const str)
486 self.strings.push(string);
487 self.names.insert(string, name);
491 pub fn interned(&self, symbol: Symbol) -> Symbol {
492 if (symbol.0.as_usize()) < self.strings.len() {
495 self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]
499 fn gensym(&mut self, string: &str) -> Symbol {
500 let symbol = self.intern(string);
501 self.gensymed(symbol)
504 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
505 self.gensyms.push(symbol);
506 Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
509 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
510 symbol.0.as_usize() >= self.strings.len()
513 pub fn get(&self, symbol: Symbol) -> &str {
514 match self.strings.get(symbol.0.as_usize()) {
515 Some(string) => string,
517 let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize];
518 self.strings[symbol.0.as_usize()]
525 use super::{Symbol, Ident};
527 #[derive(Clone, Copy, PartialEq, Eq)]
534 pub fn ident(self) -> Ident {
539 pub fn name(self) -> Symbol {
553 fn is_used_keyword_2018(self) -> bool {
554 self == keywords::Dyn.name()
557 fn is_unused_keyword_2018(self) -> bool {
558 self >= keywords::Async.name() && self <= keywords::Try.name()
563 // Returns `true` for reserved identifiers used internally for elided lifetimes,
564 // unnamed method parameters, crate root module, error recovery etc.
565 pub fn is_special(self) -> bool {
566 self.name <= keywords::Underscore.name()
569 /// Returns `true` if the token is a keyword used in the language.
570 pub fn is_used_keyword(self) -> bool {
571 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
572 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
573 self.name.is_used_keyword_2018() && self.span.rust_2018()
576 /// Returns `true` if the token is a keyword reserved for possible future use.
577 pub fn is_unused_keyword(self) -> bool {
578 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
579 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
580 self.name.is_unused_keyword_2018() && self.span.rust_2018()
583 /// Returns `true` if the token is either a special identifier or a keyword.
584 pub fn is_reserved(self) -> bool {
585 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
588 /// A keyword or reserved identifier that can be used as a path segment.
589 pub fn is_path_segment_keyword(self) -> bool {
590 self.name == keywords::Super.name() ||
591 self.name == keywords::SelfLower.name() ||
592 self.name == keywords::SelfUpper.name() ||
593 self.name == keywords::Crate.name() ||
594 self.name == keywords::PathRoot.name() ||
595 self.name == keywords::DollarCrate.name()
598 /// This identifier can be a raw identifier.
599 pub fn can_be_raw(self) -> bool {
600 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
601 !self.is_path_segment_keyword()
604 /// We see this identifier in a normal identifier position, like variable name or a type.
605 /// How was it written originally? Did it use the raw form? Let's try to guess.
606 pub fn is_raw_guess(self) -> bool {
607 self.can_be_raw() && self.is_reserved()
611 // If an interner exists, return it. Otherwise, prepare a fresh one.
613 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
614 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
617 /// Represents a string stored in the interner. Because the interner outlives any thread
618 /// which uses this type, we can safely treat `string` which points to interner data,
619 /// as an immortal string, as long as this type never crosses between threads.
620 // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
621 // by creating a new thread right after constructing the interner.
622 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
623 pub struct LocalInternedString {
624 string: &'static str,
627 impl LocalInternedString {
628 pub fn as_interned_str(self) -> InternedString {
630 symbol: Symbol::intern(self.string)
634 pub fn get(&self) -> &str {
635 // This returns a valid string since we ensure that `self` outlives the interner
636 // by creating the interner on a thread which outlives threads which can access it.
637 // This type cannot move to a thread which outlives the interner since it does
638 // not implement Send.
643 impl<U: ?Sized> std::convert::AsRef<U> for LocalInternedString
645 str: std::convert::AsRef<U>
647 fn as_ref(&self) -> &U {
652 impl<T: std::ops::Deref<Target = str>> std::cmp::PartialEq<T> for LocalInternedString {
653 fn eq(&self, other: &T) -> bool {
654 self.string == other.deref()
658 impl std::cmp::PartialEq<LocalInternedString> for str {
659 fn eq(&self, other: &LocalInternedString) -> bool {
664 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a str {
665 fn eq(&self, other: &LocalInternedString) -> bool {
666 *self == other.string
670 impl std::cmp::PartialEq<LocalInternedString> for String {
671 fn eq(&self, other: &LocalInternedString) -> bool {
676 impl<'a> std::cmp::PartialEq<LocalInternedString> for &'a String {
677 fn eq(&self, other: &LocalInternedString) -> bool {
678 *self == other.string
682 impl !Send for LocalInternedString {}
683 impl !Sync for LocalInternedString {}
685 impl std::ops::Deref for LocalInternedString {
687 fn deref(&self) -> &str { self.string }
690 impl fmt::Debug for LocalInternedString {
691 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
692 fmt::Debug::fmt(self.string, f)
696 impl fmt::Display for LocalInternedString {
697 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
698 fmt::Display::fmt(self.string, f)
702 impl Decodable for LocalInternedString {
703 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
704 Ok(Symbol::intern(&d.read_str()?).as_str())
708 impl Encodable for LocalInternedString {
709 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
710 s.emit_str(self.string)
714 /// Represents a string stored in the string interner.
715 #[derive(Clone, Copy, Eq)]
716 pub struct InternedString {
720 impl InternedString {
721 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
722 let str = with_interner(|interner| {
723 interner.get(self.symbol) as *const str
725 // This is safe because the interner keeps string alive until it is dropped.
726 // We can access it because we know the interner is still alive since we use a
727 // scoped thread local to access it, and it was alive at the beginning of this scope
731 fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
732 let (self_str, other_str) = with_interner(|interner| {
733 (interner.get(self.symbol) as *const str,
734 interner.get(other.symbol) as *const str)
736 // This is safe for the same reason that `with` is safe.
737 unsafe { f(&*self_str, &*other_str) }
740 pub fn as_symbol(self) -> Symbol {
744 pub fn as_str(self) -> LocalInternedString {
749 impl Hash for InternedString {
750 fn hash<H: Hasher>(&self, state: &mut H) {
751 self.with(|str| str.hash(state))
755 impl PartialOrd<InternedString> for InternedString {
756 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
757 if self.symbol == other.symbol {
758 return Some(Ordering::Equal);
760 self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str))
764 impl Ord for InternedString {
765 fn cmp(&self, other: &InternedString) -> Ordering {
766 if self.symbol == other.symbol {
767 return Ordering::Equal;
769 self.with2(other, |self_str, other_str| self_str.cmp(other_str))
773 impl<T: std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
774 fn eq(&self, other: &T) -> bool {
775 self.with(|string| string == other.deref())
779 impl PartialEq<InternedString> for InternedString {
780 fn eq(&self, other: &InternedString) -> bool {
781 self.symbol == other.symbol
785 impl PartialEq<InternedString> for str {
786 fn eq(&self, other: &InternedString) -> bool {
787 other.with(|string| self == string)
791 impl<'a> PartialEq<InternedString> for &'a str {
792 fn eq(&self, other: &InternedString) -> bool {
793 other.with(|string| *self == string)
797 impl PartialEq<InternedString> for String {
798 fn eq(&self, other: &InternedString) -> bool {
799 other.with(|string| self == string)
803 impl<'a> PartialEq<InternedString> for &'a String {
804 fn eq(&self, other: &InternedString) -> bool {
805 other.with(|string| *self == string)
809 impl std::convert::From<InternedString> for String {
810 fn from(val: InternedString) -> String {
811 val.as_symbol().to_string()
815 impl fmt::Debug for InternedString {
816 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
817 self.with(|str| fmt::Debug::fmt(&str, f))
821 impl fmt::Display for InternedString {
822 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
823 self.with(|str| fmt::Display::fmt(&str, f))
827 impl Decodable for InternedString {
828 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
829 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
833 impl Encodable for InternedString {
834 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
835 self.with(|string| s.emit_str(string))
845 fn interner_tests() {
846 let mut i: Interner = Interner::default();
847 // first one is zero:
848 assert_eq!(i.intern("dog"), Symbol::new(0));
849 // re-use gets the same entry:
850 assert_eq!(i.intern("dog"), Symbol::new(0));
851 // different string gets a different #:
852 assert_eq!(i.intern("cat"), Symbol::new(1));
853 assert_eq!(i.intern("cat"), Symbol::new(1));
854 // dog is still at zero
855 assert_eq!(i.intern("dog"), Symbol::new(0));
856 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
857 // gensym of same string gets new number:
858 assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
859 // gensym of *existing* string gets new number:
860 assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
864 fn without_first_quote_test() {
865 GLOBALS.set(&Globals::new(), || {
866 let i = Ident::from_str("'break");
867 assert_eq!(i.without_first_quote().name, keywords::Break.name());