1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 /// "Normalize" ident for use in comparisons using "local variable hygiene".
72 /// Identifiers with same string value become same if they came from the same non-transparent
73 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
74 /// non-transparent macros.
75 /// Technically, this operation strips all transparent marks from ident's syntactic context.
76 pub fn modern_and_legacy(self) -> Ident {
77 Ident::new(self.name, self.span.modern_and_legacy())
80 pub fn gensym(self) -> Ident {
81 Ident::new(self.name.gensymed(), self.span)
84 pub fn as_str(self) -> LocalInternedString {
88 pub fn as_interned_str(self) -> InternedString {
89 self.name.as_interned_str()
93 impl PartialEq for Ident {
94 fn eq(&self, rhs: &Self) -> bool {
95 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.name.hash(state);
102 self.span.ctxt().hash(state);
106 impl fmt::Debug for Ident {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 write!(f, "{}{:?}", self.name, self.span.ctxt())
112 impl fmt::Display for Ident {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 fmt::Display::fmt(&self.name, f)
118 impl Encodable for Ident {
119 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
120 if self.span.ctxt().modern() == SyntaxContext::empty() {
121 s.emit_str(&self.as_str())
122 } else { // FIXME(jseyfried) intercrate hygiene
123 let mut string = "#".to_owned();
124 string.push_str(&self.as_str());
130 impl Decodable for Ident {
131 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
132 let string = d.read_str()?;
133 Ok(if !string.starts_with('#') {
134 Ident::from_str(&string)
135 } else { // FIXME(jseyfried) intercrate hygiene
136 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
141 /// A symbol is an interned or gensymed string.
142 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Symbol(u32);
145 // The interner is pointed to by a thread local value which is only set on the main thread
146 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
147 // to avoid panics and other errors, even though it would be memory safe to do so.
148 #[cfg(not(parallel_queries))]
149 impl !Send for Symbol { }
150 #[cfg(not(parallel_queries))]
151 impl !Sync for Symbol { }
154 /// Maps a string to its interned representation.
155 pub fn intern(string: &str) -> Self {
156 with_interner(|interner| interner.intern(string))
159 pub fn interned(self) -> Self {
160 with_interner(|interner| interner.interned(self))
163 /// gensym's a new usize, using the current interner.
164 pub fn gensym(string: &str) -> Self {
165 with_interner(|interner| interner.gensym(string))
168 pub fn gensymed(self) -> Self {
169 with_interner(|interner| interner.gensymed(self))
172 pub fn as_str(self) -> LocalInternedString {
173 with_interner(|interner| unsafe {
174 LocalInternedString {
175 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
180 pub fn as_interned_str(self) -> InternedString {
181 with_interner(|interner| InternedString {
182 symbol: interner.interned(self)
186 pub fn as_u32(self) -> u32 {
191 impl fmt::Debug for Symbol {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
195 write!(f, "{}({})", self, self.0)
197 write!(f, "{}", self)
202 impl fmt::Display for Symbol {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 fmt::Display::fmt(&self.as_str(), f)
208 impl Encodable for Symbol {
209 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
210 s.emit_str(&self.as_str())
214 impl Decodable for Symbol {
215 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
216 Ok(Symbol::intern(&d.read_str()?))
220 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
221 fn eq(&self, other: &T) -> bool {
222 self.as_str() == other.deref()
226 // The &'static strs in this type actually point into the arena
228 pub struct Interner {
229 arena: DroplessArena,
230 names: FxHashMap<&'static str, Symbol>,
231 strings: Vec<&'static str>,
232 gensyms: Vec<Symbol>,
236 fn prefill(init: &[&str]) -> Self {
237 let mut this = Interner::default();
238 for &string in init {
240 // We can't allocate empty strings in the arena, so handle this here
241 let name = Symbol(this.strings.len() as u32);
242 this.names.insert("", name);
243 this.strings.push("");
251 pub fn intern(&mut self, string: &str) -> Symbol {
252 if let Some(&name) = self.names.get(string) {
256 let name = Symbol(self.strings.len() as u32);
258 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
259 let string: &str = unsafe {
260 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
262 // It is safe to extend the arena allocation to 'static because we only access
263 // these while the arena is still alive
264 let string: &'static str = unsafe {
265 &*(string as *const str)
267 self.strings.push(string);
268 self.names.insert(string, name);
272 pub fn interned(&self, symbol: Symbol) -> Symbol {
273 if (symbol.0 as usize) < self.strings.len() {
276 self.interned(self.gensyms[(!0 - symbol.0) as usize])
280 fn gensym(&mut self, string: &str) -> Symbol {
281 let symbol = self.intern(string);
282 self.gensymed(symbol)
285 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
286 self.gensyms.push(symbol);
287 Symbol(!0 - self.gensyms.len() as u32 + 1)
290 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
291 symbol.0 as usize >= self.strings.len()
294 pub fn get(&self, symbol: Symbol) -> &str {
295 match self.strings.get(symbol.0 as usize) {
296 Some(string) => string,
297 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
302 // In this macro, there is the requirement that the name (the number) must be monotonically
303 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
304 // except starting from the next number instead of zero.
305 macro_rules! declare_keywords {(
306 $( ($index: expr, $konst: ident, $string: expr) )*
309 use super::{Symbol, Ident};
310 #[derive(Clone, Copy, PartialEq, Eq)]
315 #[inline] pub fn ident(self) -> Ident { self.ident }
316 #[inline] pub fn name(self) -> Symbol { self.ident.name }
319 #[allow(non_upper_case_globals)]
320 pub const $konst: Keyword = Keyword {
321 ident: Ident::with_empty_ctxt(super::Symbol($index))
325 impl ::std::str::FromStr for Keyword {
328 fn from_str(s: &str) -> Result<Self, ()> {
330 $($string => Ok($konst),)*
338 pub fn fresh() -> Self {
339 Interner::prefill(&[$($string,)*])
344 // NB: leaving holes in the ident table is bad! a different ident will get
345 // interned with the id from the hole, but it will be between the min and max
346 // of the reserved words, and thus tagged as "reserved".
347 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
348 // this should be rarely necessary though if the keywords are kept in alphabetic order.
350 // Special reserved identifiers used internally for elided lifetimes,
351 // unnamed method parameters, crate root module, error recovery etc.
353 (1, CrateRoot, "{{root}}")
354 (2, DollarCrate, "$crate")
357 // Keywords used in the language.
362 (8, Continue, "continue")
366 (12, Extern, "extern")
381 (27, Return, "return")
382 (28, SelfValue, "self")
383 (29, SelfType, "Self")
384 (30, Static, "static")
385 (31, Struct, "struct")
390 (36, Unsafe, "unsafe")
395 // Keywords reserved for future use.
396 (40, Abstract, "abstract")
397 (41, Become, "become")
401 (45, Override, "override")
403 (47, Typeof, "typeof")
404 (48, Unsized, "unsized")
405 (49, Virtual, "virtual")
408 // Edition-specific keywords reserved for future use.
409 (51, Async, "async") // >= 2018 Edition only
410 (52, Dyn, "dyn") // >= 2018 Edition only
411 (53, Try, "try") // >= 2018 Edition only
413 // Special lifetime names
414 (54, UnderscoreLifetime, "'_")
415 (55, StaticLifetime, "'static")
417 // Weak keywords, have special meaning only in specific contexts.
420 (58, Default, "default")
422 (60, Existential, "existential")
426 fn is_unused_keyword_2018(self) -> bool {
427 self >= keywords::Async.name() && self <= keywords::Try.name()
432 // Returns true for reserved identifiers used internally for elided lifetimes,
433 // unnamed method parameters, crate root module, error recovery etc.
434 pub fn is_special(self) -> bool {
435 self.name <= keywords::Underscore.name()
438 /// Returns `true` if the token is a keyword used in the language.
439 pub fn is_used_keyword(self) -> bool {
440 self.name >= keywords::As.name() && self.name <= keywords::While.name()
443 /// Returns `true` if the token is a keyword reserved for possible future use.
444 pub fn is_unused_keyword(self) -> bool {
445 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
446 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
447 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
450 /// Returns `true` if the token is either a special identifier or a keyword.
451 pub fn is_reserved(self) -> bool {
452 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
455 /// A keyword or reserved identifier that can be used as a path segment.
456 pub fn is_path_segment_keyword(self) -> bool {
457 self.name == keywords::Super.name() ||
458 self.name == keywords::SelfValue.name() ||
459 self.name == keywords::SelfType.name() ||
460 self.name == keywords::Extern.name() ||
461 self.name == keywords::Crate.name() ||
462 self.name == keywords::CrateRoot.name() ||
463 self.name == keywords::DollarCrate.name()
466 // We see this identifier in a normal identifier position, like variable name or a type.
467 // How was it written originally? Did it use the raw form? Let's try to guess.
468 pub fn is_raw_guess(self) -> bool {
469 self.name != keywords::Invalid.name() &&
470 self.is_reserved() && !self.is_path_segment_keyword()
474 // If an interner exists, return it. Otherwise, prepare a fresh one.
476 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
477 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
480 /// Represents a string stored in the interner. Because the interner outlives any thread
481 /// which uses this type, we can safely treat `string` which points to interner data,
482 /// as an immortal string, as long as this type never crosses between threads.
483 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
484 // by creating a new thread right after constructing the interner
485 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
486 pub struct LocalInternedString {
487 string: &'static str,
490 impl LocalInternedString {
491 pub fn as_interned_str(self) -> InternedString {
493 symbol: Symbol::intern(self.string)
497 pub fn get(&self) -> &'static str {
502 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
504 str: ::std::convert::AsRef<U>
506 fn as_ref(&self) -> &U {
511 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
512 fn eq(&self, other: &T) -> bool {
513 self.string == other.deref()
517 impl ::std::cmp::PartialEq<LocalInternedString> for str {
518 fn eq(&self, other: &LocalInternedString) -> bool {
523 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
524 fn eq(&self, other: &LocalInternedString) -> bool {
525 *self == other.string
529 impl ::std::cmp::PartialEq<LocalInternedString> for String {
530 fn eq(&self, other: &LocalInternedString) -> bool {
535 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
536 fn eq(&self, other: &LocalInternedString) -> bool {
537 *self == other.string
541 impl !Send for LocalInternedString {}
542 impl !Sync for LocalInternedString {}
544 impl ::std::ops::Deref for LocalInternedString {
546 fn deref(&self) -> &str { self.string }
549 impl fmt::Debug for LocalInternedString {
550 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
551 fmt::Debug::fmt(self.string, f)
555 impl fmt::Display for LocalInternedString {
556 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
557 fmt::Display::fmt(self.string, f)
561 impl Decodable for LocalInternedString {
562 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
563 Ok(Symbol::intern(&d.read_str()?).as_str())
567 impl Encodable for LocalInternedString {
568 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
569 s.emit_str(self.string)
573 /// Represents a string stored in the string interner
574 #[derive(Clone, Copy, Eq)]
575 pub struct InternedString {
579 impl InternedString {
580 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
581 let str = with_interner(|interner| {
582 interner.get(self.symbol) as *const str
584 // This is safe because the interner keeps string alive until it is dropped.
585 // We can access it because we know the interner is still alive since we use a
586 // scoped thread local to access it, and it was alive at the beginning of this scope
590 pub fn as_symbol(self) -> Symbol {
594 pub fn as_str(self) -> LocalInternedString {
599 impl Hash for InternedString {
600 fn hash<H: Hasher>(&self, state: &mut H) {
601 self.with(|str| str.hash(state))
605 impl PartialOrd<InternedString> for InternedString {
606 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
607 if self.symbol == other.symbol {
608 return Some(Ordering::Equal);
610 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
614 impl Ord for InternedString {
615 fn cmp(&self, other: &InternedString) -> Ordering {
616 if self.symbol == other.symbol {
617 return Ordering::Equal;
619 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
623 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
624 fn eq(&self, other: &T) -> bool {
625 self.with(|string| string == other.deref())
629 impl PartialEq<InternedString> for InternedString {
630 fn eq(&self, other: &InternedString) -> bool {
631 self.symbol == other.symbol
635 impl PartialEq<InternedString> for str {
636 fn eq(&self, other: &InternedString) -> bool {
637 other.with(|string| self == string)
641 impl<'a> PartialEq<InternedString> for &'a str {
642 fn eq(&self, other: &InternedString) -> bool {
643 other.with(|string| *self == string)
647 impl PartialEq<InternedString> for String {
648 fn eq(&self, other: &InternedString) -> bool {
649 other.with(|string| self == string)
653 impl<'a> PartialEq<InternedString> for &'a String {
654 fn eq(&self, other: &InternedString) -> bool {
655 other.with(|string| *self == string)
659 impl ::std::convert::From<InternedString> for String {
660 fn from(val: InternedString) -> String {
661 val.as_symbol().to_string()
665 impl fmt::Debug for InternedString {
666 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
667 self.with(|str| fmt::Debug::fmt(&str, f))
671 impl fmt::Display for InternedString {
672 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
673 self.with(|str| fmt::Display::fmt(&str, f))
677 impl Decodable for InternedString {
678 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
679 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
683 impl Encodable for InternedString {
684 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
685 self.with(|string| s.emit_str(string))
695 fn interner_tests() {
696 let mut i: Interner = Interner::default();
697 // first one is zero:
698 assert_eq!(i.intern("dog"), Symbol(0));
699 // re-use gets the same entry:
700 assert_eq!(i.intern("dog"), Symbol(0));
701 // different string gets a different #:
702 assert_eq!(i.intern("cat"), Symbol(1));
703 assert_eq!(i.intern("cat"), Symbol(1));
704 // dog is still at zero
705 assert_eq!(i.intern("dog"), Symbol(0));
706 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
707 // gensym of same string gets new number :
708 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
709 // gensym of *existing* string gets new number:
710 assert_eq!(i.gensym("dog"), Symbol(4294967293));
714 fn without_first_quote_test() {
715 GLOBALS.set(&Globals::new(), || {
716 let i = Ident::from_str("'break");
717 assert_eq!(i.without_first_quote().name, keywords::Break.name());