1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e., given a value, one can easily find the
13 //! type, and vice versa.
15 use arena::DroplessArena;
16 use rustc_data_structures::fx::FxHashMap;
17 use serialize::{Decodable, Decoder, Encodable, Encoder};
21 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
22 use std::hash::{Hash, Hasher};
24 use hygiene::SyntaxContext;
25 use {Span, DUMMY_SP, GLOBALS};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
40 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
41 Ident::new(name, DUMMY_SP)
44 /// Maps an interned string to an identifier with an empty syntax context.
45 pub fn from_interned_str(string: InternedString) -> Ident {
46 Ident::with_empty_ctxt(string.as_symbol())
49 /// Maps a string to an identifier with an empty syntax context.
50 pub fn from_str(string: &str) -> Ident {
51 Ident::with_empty_ctxt(Symbol::intern(string))
54 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
55 pub fn with_span_pos(self, span: Span) -> Ident {
56 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
59 pub fn without_first_quote(self) -> Ident {
60 Ident::new(Symbol::intern(self.as_str().trim_start_matches('\'')), self.span)
63 /// "Normalize" ident for use in comparisons using "item hygiene".
64 /// Identifiers with same string value become same if they came from the same "modern" macro
65 /// (e.g., `macro` item, but not `macro_rules` item) and stay different if they came from
66 /// different "modern" macros.
67 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
68 pub fn modern(self) -> Ident {
69 Ident::new(self.name, self.span.modern())
72 /// "Normalize" ident for use in comparisons using "local variable hygiene".
73 /// Identifiers with same string value become same if they came from the same non-transparent
74 /// macro (e.g., `macro` or `macro_rules!` items) and stay different if they came from different
75 /// non-transparent macros.
76 /// Technically, this operation strips all transparent marks from ident's syntactic context.
77 pub fn modern_and_legacy(self) -> Ident {
78 Ident::new(self.name, self.span.modern_and_legacy())
81 pub fn gensym(self) -> Ident {
82 Ident::new(self.name.gensymed(), self.span)
85 pub fn gensym_if_underscore(self) -> Ident {
86 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
89 pub fn as_str(self) -> LocalInternedString {
93 pub fn as_interned_str(self) -> InternedString {
94 self.name.as_interned_str()
98 impl PartialEq for Ident {
99 fn eq(&self, rhs: &Self) -> bool {
100 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
104 impl Hash for Ident {
105 fn hash<H: Hasher>(&self, state: &mut H) {
106 self.name.hash(state);
107 self.span.ctxt().hash(state);
111 impl fmt::Debug for Ident {
112 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 write!(f, "{}{:?}", self.name, self.span.ctxt())
117 impl fmt::Display for Ident {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 fmt::Display::fmt(&self.name, f)
123 impl Encodable for Ident {
124 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
125 if self.span.ctxt().modern() == SyntaxContext::empty() {
126 s.emit_str(&self.as_str())
127 } else { // FIXME(jseyfried): intercrate hygiene
128 let mut string = "#".to_owned();
129 string.push_str(&self.as_str());
135 impl Decodable for Ident {
136 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
137 let string = d.read_str()?;
138 Ok(if !string.starts_with('#') {
139 Ident::from_str(&string)
140 } else { // FIXME(jseyfried): intercrate hygiene
141 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
146 /// A symbol is an interned or gensymed string.
147 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
148 pub struct Symbol(u32);
150 // The interner is pointed to by a thread local value which is only set on the main thread
151 // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
152 // to avoid panics and other errors, even though it would be memory safe to do so.
153 #[cfg(not(parallel_queries))]
154 impl !Send for Symbol { }
155 #[cfg(not(parallel_queries))]
156 impl !Sync for Symbol { }
159 /// Maps a string to its interned representation.
160 pub fn intern(string: &str) -> Self {
161 with_interner(|interner| interner.intern(string))
164 pub fn interned(self) -> Self {
165 with_interner(|interner| interner.interned(self))
168 /// Gensyms a new usize, using the current interner.
169 pub fn gensym(string: &str) -> Self {
170 with_interner(|interner| interner.gensym(string))
173 pub fn gensymed(self) -> Self {
174 with_interner(|interner| interner.gensymed(self))
177 pub fn as_str(self) -> LocalInternedString {
178 with_interner(|interner| unsafe {
179 LocalInternedString {
180 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
185 pub fn as_interned_str(self) -> InternedString {
186 with_interner(|interner| InternedString {
187 symbol: interner.interned(self)
191 pub fn as_u32(self) -> u32 {
196 impl fmt::Debug for Symbol {
197 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
198 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
200 write!(f, "{}({})", self, self.0)
202 write!(f, "{}", self)
207 impl fmt::Display for Symbol {
208 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209 fmt::Display::fmt(&self.as_str(), f)
213 impl Encodable for Symbol {
214 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
215 s.emit_str(&self.as_str())
219 impl Decodable for Symbol {
220 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
221 Ok(Symbol::intern(&d.read_str()?))
225 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
226 fn eq(&self, other: &T) -> bool {
227 self.as_str() == other.deref()
231 // The `&'static str`s in this type actually point into the arena.
233 pub struct Interner {
234 arena: DroplessArena,
235 names: FxHashMap<&'static str, Symbol>,
236 strings: Vec<&'static str>,
237 gensyms: Vec<Symbol>,
241 fn prefill(init: &[&str]) -> Self {
242 let mut this = Interner::default();
243 for &string in init {
245 // We can't allocate empty strings in the arena, so handle this here.
246 let name = Symbol(this.strings.len() as u32);
247 this.names.insert("", name);
248 this.strings.push("");
256 pub fn intern(&mut self, string: &str) -> Symbol {
257 if let Some(&name) = self.names.get(string) {
261 let name = Symbol(self.strings.len() as u32);
263 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
265 let string: &str = unsafe {
266 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
268 // It is safe to extend the arena allocation to `'static` because we only access
269 // these while the arena is still alive.
270 let string: &'static str = unsafe {
271 &*(string as *const str)
273 self.strings.push(string);
274 self.names.insert(string, name);
278 pub fn interned(&self, symbol: Symbol) -> Symbol {
279 if (symbol.0 as usize) < self.strings.len() {
282 self.interned(self.gensyms[(!0 - symbol.0) as usize])
286 fn gensym(&mut self, string: &str) -> Symbol {
287 let symbol = self.intern(string);
288 self.gensymed(symbol)
291 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
292 self.gensyms.push(symbol);
293 Symbol(!0 - self.gensyms.len() as u32 + 1)
296 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
297 symbol.0 as usize >= self.strings.len()
300 pub fn get(&self, symbol: Symbol) -> &str {
301 match self.strings.get(symbol.0 as usize) {
302 Some(string) => string,
303 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
308 // In this macro, there is the requirement that the name (the number) must be monotonically
309 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
310 // except starting from the next number instead of zero.
311 macro_rules! declare_keywords {(
312 $( ($index: expr, $konst: ident, $string: expr) )*
315 use super::{Symbol, Ident};
316 #[derive(Clone, Copy, PartialEq, Eq)]
321 #[inline] pub fn ident(self) -> Ident { self.ident }
322 #[inline] pub fn name(self) -> Symbol { self.ident.name }
325 #[allow(non_upper_case_globals)]
326 pub const $konst: Keyword = Keyword {
327 ident: Ident::with_empty_ctxt(super::Symbol($index))
331 impl ::std::str::FromStr for Keyword {
334 fn from_str(s: &str) -> Result<Self, ()> {
336 $($string => Ok($konst),)*
344 pub fn fresh() -> Self {
345 Interner::prefill(&[$($string,)*])
350 // N.B., leaving holes in the ident table is bad! a different ident will get
351 // interned with the id from the hole, but it will be between the min and max
352 // of the reserved words, and thus tagged as "reserved".
353 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
354 // this should be rarely necessary though if the keywords are kept in alphabetic order.
356 // Special reserved identifiers used internally for elided lifetimes,
357 // unnamed method parameters, crate root module, error recovery etc.
359 (1, PathRoot, "{{root}}")
360 (2, DollarCrate, "$crate")
363 // Keywords that are used in stable Rust.
368 (8, Continue, "continue")
372 (12, Extern, "extern")
387 (27, Return, "return")
388 (28, SelfLower, "self")
389 (29, SelfUpper, "Self")
390 (30, Static, "static")
391 (31, Struct, "struct")
396 (36, Unsafe, "unsafe")
401 // Keywords that are used in unstable Rust or reserved for future use.
402 (40, Abstract, "abstract")
403 (41, Become, "become")
407 (45, Override, "override")
409 (47, Typeof, "typeof")
410 (48, Unsized, "unsized")
411 (49, Virtual, "virtual")
414 // Edition-specific keywords that are used in stable Rust.
415 (51, Dyn, "dyn") // >= 2018 Edition only
417 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
418 (52, Async, "async") // >= 2018 Edition only
419 (53, Try, "try") // >= 2018 Edition only
421 // Special lifetime names
422 (54, UnderscoreLifetime, "'_")
423 (55, StaticLifetime, "'static")
425 // Weak keywords, have special meaning only in specific contexts.
428 (58, Default, "default")
429 (59, Existential, "existential")
434 fn is_used_keyword_2018(self) -> bool {
435 self == keywords::Dyn.name()
438 fn is_unused_keyword_2018(self) -> bool {
439 self >= keywords::Async.name() && self <= keywords::Try.name()
444 // Returns `true` for reserved identifiers used internally for elided lifetimes,
445 // unnamed method parameters, crate root module, error recovery etc.
446 pub fn is_special(self) -> bool {
447 self.name <= keywords::Underscore.name()
450 /// Returns `true` if the token is a keyword used in the language.
451 pub fn is_used_keyword(self) -> bool {
452 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
453 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
454 self.name.is_used_keyword_2018() && self.span.rust_2018()
457 /// Returns `true` if the token is a keyword reserved for possible future use.
458 pub fn is_unused_keyword(self) -> bool {
459 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
460 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
461 self.name.is_unused_keyword_2018() && self.span.rust_2018()
464 /// Returns `true` if the token is either a special identifier or a keyword.
465 pub fn is_reserved(self) -> bool {
466 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
469 /// A keyword or reserved identifier that can be used as a path segment.
470 pub fn is_path_segment_keyword(self) -> bool {
471 self.name == keywords::Super.name() ||
472 self.name == keywords::SelfLower.name() ||
473 self.name == keywords::SelfUpper.name() ||
474 self.name == keywords::Extern.name() ||
475 self.name == keywords::Crate.name() ||
476 self.name == keywords::PathRoot.name() ||
477 self.name == keywords::DollarCrate.name()
480 // We see this identifier in a normal identifier position, like variable name or a type.
481 // How was it written originally? Did it use the raw form? Let's try to guess.
482 pub fn is_raw_guess(self) -> bool {
483 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
484 self.is_reserved() && !self.is_path_segment_keyword()
488 // If an interner exists, return it. Otherwise, prepare a fresh one.
490 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
491 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
494 /// Represents a string stored in the interner. Because the interner outlives any thread
495 /// which uses this type, we can safely treat `string` which points to interner data,
496 /// as an immortal string, as long as this type never crosses between threads.
497 // FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
498 // by creating a new thread right after constructing the interner.
499 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
500 pub struct LocalInternedString {
501 string: &'static str,
504 impl LocalInternedString {
505 pub fn as_interned_str(self) -> InternedString {
507 symbol: Symbol::intern(self.string)
511 pub fn get(&self) -> &'static str {
516 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
518 str: ::std::convert::AsRef<U>
520 fn as_ref(&self) -> &U {
525 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
526 fn eq(&self, other: &T) -> bool {
527 self.string == other.deref()
531 impl ::std::cmp::PartialEq<LocalInternedString> for str {
532 fn eq(&self, other: &LocalInternedString) -> bool {
537 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
538 fn eq(&self, other: &LocalInternedString) -> bool {
539 *self == other.string
543 impl ::std::cmp::PartialEq<LocalInternedString> for String {
544 fn eq(&self, other: &LocalInternedString) -> bool {
549 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
550 fn eq(&self, other: &LocalInternedString) -> bool {
551 *self == other.string
555 impl !Send for LocalInternedString {}
556 impl !Sync for LocalInternedString {}
558 impl ::std::ops::Deref for LocalInternedString {
560 fn deref(&self) -> &str { self.string }
563 impl fmt::Debug for LocalInternedString {
564 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
565 fmt::Debug::fmt(self.string, f)
569 impl fmt::Display for LocalInternedString {
570 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
571 fmt::Display::fmt(self.string, f)
575 impl Decodable for LocalInternedString {
576 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
577 Ok(Symbol::intern(&d.read_str()?).as_str())
581 impl Encodable for LocalInternedString {
582 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
583 s.emit_str(self.string)
587 /// Represents a string stored in the string interner.
588 #[derive(Clone, Copy, Eq)]
589 pub struct InternedString {
593 impl InternedString {
594 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
595 let str = with_interner(|interner| {
596 interner.get(self.symbol) as *const str
598 // This is safe because the interner keeps string alive until it is dropped.
599 // We can access it because we know the interner is still alive since we use a
600 // scoped thread local to access it, and it was alive at the beginning of this scope
604 pub fn as_symbol(self) -> Symbol {
608 pub fn as_str(self) -> LocalInternedString {
613 impl Hash for InternedString {
614 fn hash<H: Hasher>(&self, state: &mut H) {
615 self.with(|str| str.hash(state))
619 impl PartialOrd<InternedString> for InternedString {
620 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
621 if self.symbol == other.symbol {
622 return Some(Ordering::Equal);
624 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
628 impl Ord for InternedString {
629 fn cmp(&self, other: &InternedString) -> Ordering {
630 if self.symbol == other.symbol {
631 return Ordering::Equal;
633 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
637 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
638 fn eq(&self, other: &T) -> bool {
639 self.with(|string| string == other.deref())
643 impl PartialEq<InternedString> for InternedString {
644 fn eq(&self, other: &InternedString) -> bool {
645 self.symbol == other.symbol
649 impl PartialEq<InternedString> for str {
650 fn eq(&self, other: &InternedString) -> bool {
651 other.with(|string| self == string)
655 impl<'a> PartialEq<InternedString> for &'a str {
656 fn eq(&self, other: &InternedString) -> bool {
657 other.with(|string| *self == string)
661 impl PartialEq<InternedString> for String {
662 fn eq(&self, other: &InternedString) -> bool {
663 other.with(|string| self == string)
667 impl<'a> PartialEq<InternedString> for &'a String {
668 fn eq(&self, other: &InternedString) -> bool {
669 other.with(|string| *self == string)
673 impl ::std::convert::From<InternedString> for String {
674 fn from(val: InternedString) -> String {
675 val.as_symbol().to_string()
679 impl fmt::Debug for InternedString {
680 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
681 self.with(|str| fmt::Debug::fmt(&str, f))
685 impl fmt::Display for InternedString {
686 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
687 self.with(|str| fmt::Display::fmt(&str, f))
691 impl Decodable for InternedString {
692 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
693 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
697 impl Encodable for InternedString {
698 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
699 self.with(|string| s.emit_str(string))
709 fn interner_tests() {
710 let mut i: Interner = Interner::default();
711 // first one is zero:
712 assert_eq!(i.intern("dog"), Symbol(0));
713 // re-use gets the same entry:
714 assert_eq!(i.intern("dog"), Symbol(0));
715 // different string gets a different #:
716 assert_eq!(i.intern("cat"), Symbol(1));
717 assert_eq!(i.intern("cat"), Symbol(1));
718 // dog is still at zero
719 assert_eq!(i.intern("dog"), Symbol(0));
720 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
721 // gensym of same string gets new number :
722 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
723 // gensym of *existing* string gets new number:
724 assert_eq!(i.gensym("dog"), Symbol(4294967293));
728 fn without_first_quote_test() {
729 GLOBALS.set(&Globals::new(), || {
730 let i = Ident::from_str("'break");
731 assert_eq!(i.without_first_quote().name, keywords::Break.name());