1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 /// "Normalize" ident for use in comparisons using "local variable hygiene".
72 /// Identifiers with same string value become same if they came from the same non-transparent
73 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
74 /// non-transparent macros.
75 /// Technically, this operation strips all transparent marks from ident's syntactic context.
76 pub fn modern_and_legacy(self) -> Ident {
77 Ident::new(self.name, self.span.modern_and_legacy())
80 pub fn gensym(self) -> Ident {
81 Ident::new(self.name.gensymed(), self.span)
84 pub fn as_str(self) -> LocalInternedString {
88 pub fn as_interned_str(self) -> InternedString {
89 self.name.as_interned_str()
93 impl PartialEq for Ident {
94 fn eq(&self, rhs: &Self) -> bool {
95 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.name.hash(state);
102 self.span.ctxt().hash(state);
106 impl fmt::Debug for Ident {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 write!(f, "{}{:?}", self.name, self.span.ctxt())
112 impl fmt::Display for Ident {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 fmt::Display::fmt(&self.name, f)
118 impl Encodable for Ident {
119 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
120 if self.span.ctxt().modern() == SyntaxContext::empty() {
121 s.emit_str(&self.as_str())
122 } else { // FIXME(jseyfried) intercrate hygiene
123 let mut string = "#".to_owned();
124 string.push_str(&self.as_str());
130 impl Decodable for Ident {
131 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
132 let string = d.read_str()?;
133 Ok(if !string.starts_with('#') {
134 Ident::from_str(&string)
135 } else { // FIXME(jseyfried) intercrate hygiene
136 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
141 /// A symbol is an interned or gensymed string.
142 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Symbol(u32);
145 // The interner is pointed to by a thread local value which is only set on the main thread
146 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
147 // to avoid panics and other errors, even though it would be memory safe to do so.
148 #[cfg(not(parallel_queries))]
149 impl !Send for Symbol { }
150 #[cfg(not(parallel_queries))]
151 impl !Sync for Symbol { }
154 /// Maps a string to its interned representation.
155 pub fn intern(string: &str) -> Self {
156 with_interner(|interner| interner.intern(string))
159 pub fn interned(self) -> Self {
160 with_interner(|interner| interner.interned(self))
163 /// gensym's a new usize, using the current interner.
164 pub fn gensym(string: &str) -> Self {
165 with_interner(|interner| interner.gensym(string))
168 pub fn gensymed(self) -> Self {
169 with_interner(|interner| interner.gensymed(self))
172 pub fn as_str(self) -> LocalInternedString {
173 with_interner(|interner| unsafe {
174 LocalInternedString {
175 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
180 pub fn as_interned_str(self) -> InternedString {
181 with_interner(|interner| InternedString {
182 symbol: interner.interned(self)
186 pub fn as_u32(self) -> u32 {
191 impl fmt::Debug for Symbol {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
195 write!(f, "{}({})", self, self.0)
197 write!(f, "{}", self)
202 impl fmt::Display for Symbol {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 fmt::Display::fmt(&self.as_str(), f)
208 impl Encodable for Symbol {
209 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
210 s.emit_str(&self.as_str())
214 impl Decodable for Symbol {
215 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
216 Ok(Symbol::intern(&d.read_str()?))
220 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
221 fn eq(&self, other: &T) -> bool {
222 self.as_str() == other.deref()
226 // The &'static strs in this type actually point into the arena
228 pub struct Interner {
229 arena: DroplessArena,
230 names: FxHashMap<&'static str, Symbol>,
231 strings: Vec<&'static str>,
232 gensyms: Vec<Symbol>,
236 fn prefill(init: &[&str]) -> Self {
237 let mut this = Interner::default();
238 for &string in init {
240 // We can't allocate empty strings in the arena, so handle this here
241 let name = Symbol(this.strings.len() as u32);
242 this.names.insert("", name);
243 this.strings.push("");
251 pub fn intern(&mut self, string: &str) -> Symbol {
252 if let Some(&name) = self.names.get(string) {
256 let name = Symbol(self.strings.len() as u32);
258 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
259 let string: &str = unsafe {
260 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
262 // It is safe to extend the arena allocation to 'static because we only access
263 // these while the arena is still alive
264 let string: &'static str = unsafe {
265 &*(string as *const str)
267 self.strings.push(string);
268 self.names.insert(string, name);
272 pub fn interned(&self, symbol: Symbol) -> Symbol {
273 if (symbol.0 as usize) < self.strings.len() {
276 self.interned(self.gensyms[(!0 - symbol.0) as usize])
280 fn gensym(&mut self, string: &str) -> Symbol {
281 let symbol = self.intern(string);
282 self.gensymed(symbol)
285 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
286 self.gensyms.push(symbol);
287 Symbol(!0 - self.gensyms.len() as u32 + 1)
290 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
291 symbol.0 as usize >= self.strings.len()
294 pub fn get(&self, symbol: Symbol) -> &str {
295 match self.strings.get(symbol.0 as usize) {
296 Some(string) => string,
297 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
302 // In this macro, there is the requirement that the name (the number) must be monotonically
303 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
304 // except starting from the next number instead of zero.
305 macro_rules! declare_keywords {(
306 $( ($index: expr, $konst: ident, $string: expr) )*
309 use super::{Symbol, Ident};
310 #[derive(Clone, Copy, PartialEq, Eq)]
315 #[inline] pub fn ident(self) -> Ident { self.ident }
316 #[inline] pub fn name(self) -> Symbol { self.ident.name }
319 #[allow(non_upper_case_globals)]
320 pub const $konst: Keyword = Keyword {
321 ident: Ident::with_empty_ctxt(super::Symbol($index))
325 impl ::std::str::FromStr for Keyword {
328 fn from_str(s: &str) -> Result<Self, ()> {
330 $($string => Ok($konst),)*
338 pub fn fresh() -> Self {
339 Interner::prefill(&[$($string,)*])
344 // NB: leaving holes in the ident table is bad! a different ident will get
345 // interned with the id from the hole, but it will be between the min and max
346 // of the reserved words, and thus tagged as "reserved".
347 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
348 // this should be rarely necessary though if the keywords are kept in alphabetic order.
350 // Special reserved identifiers used internally for elided lifetimes,
351 // unnamed method parameters, crate root module, error recovery etc.
353 (1, CrateRoot, "{{root}}")
354 (2, DollarCrate, "$crate")
357 // Keywords used in the language.
362 (8, Continue, "continue")
366 (12, Extern, "extern")
381 (27, Return, "return")
382 (28, SelfValue, "self")
383 (29, SelfType, "Self")
384 (30, Static, "static")
385 (31, Struct, "struct")
390 (36, Unsafe, "unsafe")
395 // Keywords reserved for future use.
396 (40, Abstract, "abstract")
397 (41, Become, "become")
401 (45, Override, "override")
403 (47, Typeof, "typeof")
404 (48, Unsized, "unsized")
405 (49, Virtual, "virtual")
408 // Edition-specific keywords reserved for future use.
409 (51, Async, "async") // >= 2018 Edition only
410 (52, Dyn, "dyn") // >= 2018 Edition only
411 (53, Try, "try") // >= 2018 Edition only
413 // Special lifetime names
414 (54, UnderscoreLifetime, "'_")
415 (55, StaticLifetime, "'static")
417 // Weak keywords, have special meaning only in specific contexts.
420 (58, Default, "default")
422 (60, Existential, "existential")
426 fn is_unused_keyword_2018(self) -> bool {
427 self >= keywords::Async.name() && self <= keywords::Try.name()
432 // Returns true for reserved identifiers used internally for elided lifetimes,
433 // unnamed method parameters, crate root module, error recovery etc.
434 pub fn is_special(self) -> bool {
435 self.name <= keywords::Underscore.name()
438 /// Returns `true` if the token is a keyword used in the language.
439 pub fn is_used_keyword(self) -> bool {
440 self.name >= keywords::As.name() && self.name <= keywords::While.name()
443 /// Returns `true` if the token is a keyword reserved for possible future use.
444 pub fn is_unused_keyword(self) -> bool {
445 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
446 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
447 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
450 /// Returns `true` if the token is either a special identifier or a keyword.
451 pub fn is_reserved(self) -> bool {
452 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
455 /// A keyword or reserved identifier that can be used as a path segment.
456 pub fn is_path_segment_keyword(self) -> bool {
457 self.name == keywords::Super.name() ||
458 self.name == keywords::SelfValue.name() ||
459 self.name == keywords::SelfType.name() ||
460 self.name == keywords::Extern.name() ||
461 self.name == keywords::Crate.name() ||
462 self.name == keywords::CrateRoot.name() ||
463 self.name == keywords::DollarCrate.name()
466 // We see this identifier in a normal identifier position, like variable name or a type.
467 // How was it written originally? Did it use the raw form? Let's try to guess.
468 pub fn is_raw_guess(self) -> bool {
469 self.name != keywords::Invalid.name() &&
470 self.is_reserved() && !self.is_path_segment_keyword()
474 // If an interner exists, return it. Otherwise, prepare a fresh one.
476 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
477 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
480 /// Represents a string stored in the interner. Because the interner outlives any thread
481 /// which uses this type, we can safely treat `string` which points to interner data,
482 /// as an immortal string, as long as this type never crosses between threads.
483 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
484 // by creating a new thread right after constructing the interner
485 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
486 pub struct LocalInternedString {
487 string: &'static str,
490 impl LocalInternedString {
491 pub fn as_interned_str(self) -> InternedString {
493 symbol: Symbol::intern(self.string)
498 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
500 str: ::std::convert::AsRef<U>
502 fn as_ref(&self) -> &U {
507 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
508 fn eq(&self, other: &T) -> bool {
509 self.string == other.deref()
513 impl ::std::cmp::PartialEq<LocalInternedString> for str {
514 fn eq(&self, other: &LocalInternedString) -> bool {
519 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
520 fn eq(&self, other: &LocalInternedString) -> bool {
521 *self == other.string
525 impl ::std::cmp::PartialEq<LocalInternedString> for String {
526 fn eq(&self, other: &LocalInternedString) -> bool {
531 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
532 fn eq(&self, other: &LocalInternedString) -> bool {
533 *self == other.string
537 impl !Send for LocalInternedString {}
538 impl !Sync for LocalInternedString {}
540 impl ::std::ops::Deref for LocalInternedString {
542 fn deref(&self) -> &str { self.string }
545 impl fmt::Debug for LocalInternedString {
546 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
547 fmt::Debug::fmt(self.string, f)
551 impl fmt::Display for LocalInternedString {
552 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
553 fmt::Display::fmt(self.string, f)
557 impl Decodable for LocalInternedString {
558 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
559 Ok(Symbol::intern(&d.read_str()?).as_str())
563 impl Encodable for LocalInternedString {
564 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
565 s.emit_str(self.string)
569 /// Represents a string stored in the string interner
570 #[derive(Clone, Copy, Eq)]
571 pub struct InternedString {
575 impl InternedString {
576 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
577 let str = with_interner(|interner| {
578 interner.get(self.symbol) as *const str
580 // This is safe because the interner keeps string alive until it is dropped.
581 // We can access it because we know the interner is still alive since we use a
582 // scoped thread local to access it, and it was alive at the beginning of this scope
586 pub fn as_symbol(self) -> Symbol {
590 pub fn as_str(self) -> LocalInternedString {
595 impl Hash for InternedString {
596 fn hash<H: Hasher>(&self, state: &mut H) {
597 self.with(|str| str.hash(state))
601 impl PartialOrd<InternedString> for InternedString {
602 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
603 if self.symbol == other.symbol {
604 return Some(Ordering::Equal);
606 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
610 impl Ord for InternedString {
611 fn cmp(&self, other: &InternedString) -> Ordering {
612 if self.symbol == other.symbol {
613 return Ordering::Equal;
615 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
619 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
620 fn eq(&self, other: &T) -> bool {
621 self.with(|string| string == other.deref())
625 impl PartialEq<InternedString> for InternedString {
626 fn eq(&self, other: &InternedString) -> bool {
627 self.symbol == other.symbol
631 impl PartialEq<InternedString> for str {
632 fn eq(&self, other: &InternedString) -> bool {
633 other.with(|string| self == string)
637 impl<'a> PartialEq<InternedString> for &'a str {
638 fn eq(&self, other: &InternedString) -> bool {
639 other.with(|string| *self == string)
643 impl PartialEq<InternedString> for String {
644 fn eq(&self, other: &InternedString) -> bool {
645 other.with(|string| self == string)
649 impl<'a> PartialEq<InternedString> for &'a String {
650 fn eq(&self, other: &InternedString) -> bool {
651 other.with(|string| *self == string)
655 impl ::std::convert::From<InternedString> for String {
656 fn from(val: InternedString) -> String {
657 val.as_symbol().to_string()
661 impl fmt::Debug for InternedString {
662 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
663 self.with(|str| fmt::Debug::fmt(&str, f))
667 impl fmt::Display for InternedString {
668 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
669 self.with(|str| fmt::Display::fmt(&str, f))
673 impl Decodable for InternedString {
674 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
675 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
679 impl Encodable for InternedString {
680 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
681 self.with(|string| s.emit_str(string))
691 fn interner_tests() {
692 let mut i: Interner = Interner::default();
693 // first one is zero:
694 assert_eq!(i.intern("dog"), Symbol(0));
695 // re-use gets the same entry:
696 assert_eq!(i.intern("dog"), Symbol(0));
697 // different string gets a different #:
698 assert_eq!(i.intern("cat"), Symbol(1));
699 assert_eq!(i.intern("cat"), Symbol(1));
700 // dog is still at zero
701 assert_eq!(i.intern("dog"), Symbol(0));
702 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
703 // gensym of same string gets new number :
704 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
705 // gensym of *existing* string gets new number:
706 assert_eq!(i.gensym("dog"), Symbol(4294967293));
710 fn without_first_quote_test() {
711 GLOBALS.set(&Globals::new(), || {
712 let i = Ident::from_str("'break");
713 assert_eq!(i.without_first_quote().name, keywords::Break.name());