1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 /// "Normalize" ident for use in comparisons using "local variable hygiene".
72 /// Identifiers with same string value become same if they came from the same non-transparent
73 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
74 /// non-transparent macros.
75 /// Technically, this operation strips all transparent marks from ident's syntactic context.
76 pub fn modern_and_legacy(self) -> Ident {
77 Ident::new(self.name, self.span.modern_and_legacy())
80 pub fn gensym(self) -> Ident {
81 Ident::new(self.name.gensymed(), self.span)
84 pub fn as_str(self) -> LocalInternedString {
88 pub fn as_interned_str(self) -> InternedString {
89 self.name.as_interned_str()
93 impl PartialEq for Ident {
94 fn eq(&self, rhs: &Self) -> bool {
95 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.name.hash(state);
102 self.span.ctxt().hash(state);
106 impl fmt::Debug for Ident {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 write!(f, "{}{:?}", self.name, self.span.ctxt())
112 impl fmt::Display for Ident {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 fmt::Display::fmt(&self.name, f)
118 impl Encodable for Ident {
119 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
120 if self.span.ctxt().modern() == SyntaxContext::empty() {
121 s.emit_str(&self.as_str())
122 } else { // FIXME(jseyfried) intercrate hygiene
123 let mut string = "#".to_owned();
124 string.push_str(&self.as_str());
130 impl Decodable for Ident {
131 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
132 let string = d.read_str()?;
133 Ok(if !string.starts_with('#') {
134 Ident::from_str(&string)
135 } else { // FIXME(jseyfried) intercrate hygiene
136 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
141 /// A symbol is an interned or gensymed string.
142 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Symbol(u32);
145 // The interner is pointed to by a thread local value which is only set on the main thread
146 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
147 // to avoid panics and other errors, even though it would be memory safe to do so.
148 #[cfg(not(parallel_queries))]
149 impl !Send for Symbol { }
150 #[cfg(not(parallel_queries))]
151 impl !Sync for Symbol { }
154 /// Maps a string to its interned representation.
155 pub fn intern(string: &str) -> Self {
156 with_interner(|interner| interner.intern(string))
159 pub fn interned(self) -> Self {
160 with_interner(|interner| interner.interned(self))
163 /// gensym's a new usize, using the current interner.
164 pub fn gensym(string: &str) -> Self {
165 with_interner(|interner| interner.gensym(string))
168 pub fn gensymed(self) -> Self {
169 with_interner(|interner| interner.gensymed(self))
172 pub fn as_str(self) -> LocalInternedString {
173 with_interner(|interner| unsafe {
174 LocalInternedString {
175 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
180 pub fn as_interned_str(self) -> InternedString {
181 with_interner(|interner| InternedString {
182 symbol: interner.interned(self)
186 pub fn as_u32(self) -> u32 {
191 impl fmt::Debug for Symbol {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
195 write!(f, "{}({})", self, self.0)
197 write!(f, "{}", self)
202 impl fmt::Display for Symbol {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 fmt::Display::fmt(&self.as_str(), f)
208 impl Encodable for Symbol {
209 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
210 s.emit_str(&self.as_str())
214 impl Decodable for Symbol {
215 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
216 Ok(Symbol::intern(&d.read_str()?))
220 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
221 fn eq(&self, other: &T) -> bool {
222 self.as_str() == other.deref()
226 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 pub fn new() -> Self {
237 arena: DroplessArena::new(),
238 names: Default::default(),
239 strings: Default::default(),
240 gensyms: Default::default(),
244 fn prefill(init: &[&str]) -> Self {
245 let mut this = Interner::new();
246 for &string in init {
248 // We can't allocate empty strings in the arena, so handle this here
249 let name = Symbol(this.strings.len() as u32);
250 this.names.insert("", name);
251 this.strings.push("");
259 pub fn intern(&mut self, string: &str) -> Symbol {
260 if let Some(&name) = self.names.get(string) {
264 let name = Symbol(self.strings.len() as u32);
266 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
267 let string: &str = unsafe {
268 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
270 // It is safe to extend the arena allocation to 'static because we only access
271 // these while the arena is still alive
272 let string: &'static str = unsafe {
273 &*(string as *const str)
275 self.strings.push(string);
276 self.names.insert(string, name);
280 pub fn interned(&self, symbol: Symbol) -> Symbol {
281 if (symbol.0 as usize) < self.strings.len() {
284 self.interned(self.gensyms[(!0 - symbol.0) as usize])
288 fn gensym(&mut self, string: &str) -> Symbol {
289 let symbol = self.intern(string);
290 self.gensymed(symbol)
293 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
294 self.gensyms.push(symbol);
295 Symbol(!0 - self.gensyms.len() as u32 + 1)
298 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
299 symbol.0 as usize >= self.strings.len()
302 pub fn get(&self, symbol: Symbol) -> &str {
303 match self.strings.get(symbol.0 as usize) {
304 Some(string) => string,
305 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
310 // In this macro, there is the requirement that the name (the number) must be monotonically
311 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
312 // except starting from the next number instead of zero.
313 macro_rules! declare_keywords {(
314 $( ($index: expr, $konst: ident, $string: expr) )*
317 use super::{Symbol, Ident};
318 #[derive(Clone, Copy, PartialEq, Eq)]
323 #[inline] pub fn ident(self) -> Ident { self.ident }
324 #[inline] pub fn name(self) -> Symbol { self.ident.name }
327 #[allow(non_upper_case_globals)]
328 pub const $konst: Keyword = Keyword {
329 ident: Ident::with_empty_ctxt(super::Symbol($index))
333 impl ::std::str::FromStr for Keyword {
336 fn from_str(s: &str) -> Result<Self, ()> {
338 $($string => Ok($konst),)*
346 pub fn fresh() -> Self {
347 Interner::prefill(&[$($string,)*])
352 // NB: leaving holes in the ident table is bad! a different ident will get
353 // interned with the id from the hole, but it will be between the min and max
354 // of the reserved words, and thus tagged as "reserved".
355 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
356 // this should be rarely necessary though if the keywords are kept in alphabetic order.
358 // Special reserved identifiers used internally for elided lifetimes,
359 // unnamed method parameters, crate root module, error recovery etc.
361 (1, CrateRoot, "{{root}}")
362 (2, DollarCrate, "$crate")
365 // Keywords used in the language.
370 (8, Continue, "continue")
374 (12, Extern, "extern")
389 (27, Return, "return")
390 (28, SelfValue, "self")
391 (29, SelfType, "Self")
392 (30, Static, "static")
393 (31, Struct, "struct")
398 (36, Unsafe, "unsafe")
403 // Keywords reserved for future use.
404 (40, Abstract, "abstract")
405 (41, Become, "become")
409 (45, Override, "override")
411 (47, Typeof, "typeof")
412 (48, Unsized, "unsized")
413 (49, Virtual, "virtual")
416 // Edition-specific keywords currently in use.
417 (51, Try, "try") // >= 2018 Edition Only
419 // Edition-specific keywords reserved for future use.
420 (52, Async, "async") // >= 2018 Edition Only
422 // Special lifetime names
423 (53, UnderscoreLifetime, "'_")
424 (54, StaticLifetime, "'static")
426 // Weak keywords, have special meaning only in specific contexts.
429 (57, Default, "default")
432 (60, Existential, "existential")
436 fn is_used_keyword_2018(self) -> bool {
437 self == keywords::Try.name()
440 fn is_unused_keyword_2018(self) -> bool {
441 self == keywords::Async.name()
446 // Returns true for reserved identifiers used internally for elided lifetimes,
447 // unnamed method parameters, crate root module, error recovery etc.
448 pub fn is_special(self) -> bool {
449 self.name <= keywords::Underscore.name()
452 /// Returns `true` if the token is a keyword used in the language.
453 pub fn is_used_keyword(self) -> bool {
454 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
455 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
456 self.name.is_used_keyword_2018() && self.span.edition() == Edition::Edition2018
459 /// Returns `true` if the token is a keyword reserved for possible future use.
460 pub fn is_unused_keyword(self) -> bool {
461 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
462 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
463 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
466 /// Returns `true` if the token is either a special identifier or a keyword.
467 pub fn is_reserved(self) -> bool {
468 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
471 /// A keyword or reserved identifier that can be used as a path segment.
472 pub fn is_path_segment_keyword(self) -> bool {
473 self.name == keywords::Super.name() ||
474 self.name == keywords::SelfValue.name() ||
475 self.name == keywords::SelfType.name() ||
476 self.name == keywords::Extern.name() ||
477 self.name == keywords::Crate.name() ||
478 self.name == keywords::CrateRoot.name() ||
479 self.name == keywords::DollarCrate.name()
482 // We see this identifier in a normal identifier position, like variable name or a type.
483 // How was it written originally? Did it use the raw form? Let's try to guess.
484 pub fn is_raw_guess(self) -> bool {
485 self.name != keywords::Invalid.name() &&
486 self.is_reserved() && !self.is_path_segment_keyword()
490 // If an interner exists, return it. Otherwise, prepare a fresh one.
492 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
493 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
496 /// Represents a string stored in the interner. Because the interner outlives any thread
497 /// which uses this type, we can safely treat `string` which points to interner data,
498 /// as an immortal string, as long as this type never crosses between threads.
499 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
500 // by creating a new thread right after constructing the interner
501 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
502 pub struct LocalInternedString {
503 string: &'static str,
506 impl LocalInternedString {
507 pub fn as_interned_str(self) -> InternedString {
509 symbol: Symbol::intern(self.string)
514 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
516 str: ::std::convert::AsRef<U>
518 fn as_ref(&self) -> &U {
523 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
524 fn eq(&self, other: &T) -> bool {
525 self.string == other.deref()
529 impl ::std::cmp::PartialEq<LocalInternedString> for str {
530 fn eq(&self, other: &LocalInternedString) -> bool {
535 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
536 fn eq(&self, other: &LocalInternedString) -> bool {
537 *self == other.string
541 impl ::std::cmp::PartialEq<LocalInternedString> for String {
542 fn eq(&self, other: &LocalInternedString) -> bool {
547 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
548 fn eq(&self, other: &LocalInternedString) -> bool {
549 *self == other.string
553 impl !Send for LocalInternedString {}
554 impl !Sync for LocalInternedString {}
556 impl ::std::ops::Deref for LocalInternedString {
558 fn deref(&self) -> &str { self.string }
561 impl fmt::Debug for LocalInternedString {
562 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
563 fmt::Debug::fmt(self.string, f)
567 impl fmt::Display for LocalInternedString {
568 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
569 fmt::Display::fmt(self.string, f)
573 impl Decodable for LocalInternedString {
574 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
575 Ok(Symbol::intern(&d.read_str()?).as_str())
579 impl Encodable for LocalInternedString {
580 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
581 s.emit_str(self.string)
585 /// Represents a string stored in the string interner
586 #[derive(Clone, Copy, Eq)]
587 pub struct InternedString {
591 impl InternedString {
592 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
593 let str = with_interner(|interner| {
594 interner.get(self.symbol) as *const str
596 // This is safe because the interner keeps string alive until it is dropped.
597 // We can access it because we know the interner is still alive since we use a
598 // scoped thread local to access it, and it was alive at the begining of this scope
602 pub fn as_symbol(self) -> Symbol {
606 pub fn as_str(self) -> LocalInternedString {
611 impl Hash for InternedString {
612 fn hash<H: Hasher>(&self, state: &mut H) {
613 self.with(|str| str.hash(state))
617 impl PartialOrd<InternedString> for InternedString {
618 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
619 if self.symbol == other.symbol {
620 return Some(Ordering::Equal);
622 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
626 impl Ord for InternedString {
627 fn cmp(&self, other: &InternedString) -> Ordering {
628 if self.symbol == other.symbol {
629 return Ordering::Equal;
631 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
635 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
636 fn eq(&self, other: &T) -> bool {
637 self.with(|string| string == other.deref())
641 impl PartialEq<InternedString> for InternedString {
642 fn eq(&self, other: &InternedString) -> bool {
643 self.symbol == other.symbol
647 impl PartialEq<InternedString> for str {
648 fn eq(&self, other: &InternedString) -> bool {
649 other.with(|string| self == string)
653 impl<'a> PartialEq<InternedString> for &'a str {
654 fn eq(&self, other: &InternedString) -> bool {
655 other.with(|string| *self == string)
659 impl PartialEq<InternedString> for String {
660 fn eq(&self, other: &InternedString) -> bool {
661 other.with(|string| self == string)
665 impl<'a> PartialEq<InternedString> for &'a String {
666 fn eq(&self, other: &InternedString) -> bool {
667 other.with(|string| *self == string)
671 impl ::std::convert::From<InternedString> for String {
672 fn from(val: InternedString) -> String {
673 val.as_symbol().to_string()
677 impl fmt::Debug for InternedString {
678 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
679 self.with(|str| fmt::Debug::fmt(&str, f))
683 impl fmt::Display for InternedString {
684 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
685 self.with(|str| fmt::Display::fmt(&str, f))
689 impl Decodable for InternedString {
690 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
691 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
695 impl Encodable for InternedString {
696 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
697 self.with(|string| s.emit_str(string))
707 fn interner_tests() {
708 let mut i: Interner = Interner::new();
709 // first one is zero:
710 assert_eq!(i.intern("dog"), Symbol(0));
711 // re-use gets the same entry:
712 assert_eq!(i.intern("dog"), Symbol(0));
713 // different string gets a different #:
714 assert_eq!(i.intern("cat"), Symbol(1));
715 assert_eq!(i.intern("cat"), Symbol(1));
716 // dog is still at zero
717 assert_eq!(i.intern("dog"), Symbol(0));
718 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
719 // gensym of same string gets new number :
720 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
721 // gensym of *existing* string gets new number:
722 assert_eq!(i.gensym("dog"), Symbol(4294967293));
726 fn without_first_quote_test() {
727 GLOBALS.set(&Globals::new(), || {
728 let i = Ident::from_str("'break");
729 assert_eq!(i.without_first_quote().name, keywords::Break.name());