1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
61 /// "Normalize" ident for use in comparisons using "item hygiene".
62 /// Identifiers with same string value become same if they came from the same "modern" macro
63 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
64 /// different "modern" macros.
65 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
66 pub fn modern(self) -> Ident {
67 Ident::new(self.name, self.span.modern())
70 /// "Normalize" ident for use in comparisons using "local variable hygiene".
71 /// Identifiers with same string value become same if they came from the same non-transparent
72 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
73 /// non-transparent macros.
74 /// Technically, this operation strips all transparent marks from ident's syntactic context.
75 pub fn modern_and_legacy(self) -> Ident {
76 Ident::new(self.name, self.span.modern_and_legacy())
79 pub fn gensym(self) -> Ident {
80 Ident::new(self.name.gensymed(), self.span)
83 pub fn as_str(self) -> LocalInternedString {
87 pub fn as_interned_str(self) -> InternedString {
88 self.name.as_interned_str()
92 impl PartialEq for Ident {
93 fn eq(&self, rhs: &Self) -> bool {
94 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
99 fn hash<H: Hasher>(&self, state: &mut H) {
100 self.name.hash(state);
101 self.span.ctxt().hash(state);
105 impl fmt::Debug for Ident {
106 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107 write!(f, "{}{:?}", self.name, self.span.ctxt())
111 impl fmt::Display for Ident {
112 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 fmt::Display::fmt(&self.name, f)
117 impl Encodable for Ident {
118 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
119 if self.span.ctxt().modern() == SyntaxContext::empty() {
120 s.emit_str(&self.as_str())
121 } else { // FIXME(jseyfried) intercrate hygiene
122 let mut string = "#".to_owned();
123 string.push_str(&self.as_str());
129 impl Decodable for Ident {
130 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
131 let string = d.read_str()?;
132 Ok(if !string.starts_with('#') {
133 Ident::from_str(&string)
134 } else { // FIXME(jseyfried) intercrate hygiene
135 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
140 /// A symbol is an interned or gensymed string.
141 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142 pub struct Symbol(u32);
144 // The interner is pointed to by a thread local value which is only set on the main thread
145 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
146 // to avoid panics and other errors, even though it would be memory safe to do so.
147 #[cfg(not(parallel_queries))]
148 impl !Send for Symbol { }
149 #[cfg(not(parallel_queries))]
150 impl !Sync for Symbol { }
153 /// Maps a string to its interned representation.
154 pub fn intern(string: &str) -> Self {
155 with_interner(|interner| interner.intern(string))
158 pub fn interned(self) -> Self {
159 with_interner(|interner| interner.interned(self))
162 /// gensym's a new usize, using the current interner.
163 pub fn gensym(string: &str) -> Self {
164 with_interner(|interner| interner.gensym(string))
167 pub fn gensymed(self) -> Self {
168 with_interner(|interner| interner.gensymed(self))
171 pub fn as_str(self) -> LocalInternedString {
172 with_interner(|interner| unsafe {
173 LocalInternedString {
174 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
179 pub fn as_interned_str(self) -> InternedString {
180 with_interner(|interner| InternedString {
181 symbol: interner.interned(self)
185 pub fn as_u32(self) -> u32 {
190 impl fmt::Debug for Symbol {
191 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
194 write!(f, "{}({})", self, self.0)
196 write!(f, "{}", self)
201 impl fmt::Display for Symbol {
202 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
203 fmt::Display::fmt(&self.as_str(), f)
207 impl Encodable for Symbol {
208 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
209 s.emit_str(&self.as_str())
213 impl Decodable for Symbol {
214 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
215 Ok(Symbol::intern(&d.read_str()?))
219 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
220 fn eq(&self, other: &T) -> bool {
221 self.as_str() == other.deref()
225 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 fn prefill(init: &[&str]) -> Self {
236 let mut this = Interner::default();
237 for &string in init {
239 // We can't allocate empty strings in the arena, so handle this here
240 let name = Symbol(this.strings.len() as u32);
241 this.names.insert("", name);
242 this.strings.push("");
250 pub fn intern(&mut self, string: &str) -> Symbol {
251 if let Some(&name) = self.names.get(string) {
255 let name = Symbol(self.strings.len() as u32);
257 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
258 let string: &str = unsafe {
259 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
261 // It is safe to extend the arena allocation to 'static because we only access
262 // these while the arena is still alive
263 let string: &'static str = unsafe {
264 &*(string as *const str)
266 self.strings.push(string);
267 self.names.insert(string, name);
271 pub fn interned(&self, symbol: Symbol) -> Symbol {
272 if (symbol.0 as usize) < self.strings.len() {
275 self.interned(self.gensyms[(!0 - symbol.0) as usize])
279 fn gensym(&mut self, string: &str) -> Symbol {
280 let symbol = self.intern(string);
281 self.gensymed(symbol)
284 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
285 self.gensyms.push(symbol);
286 Symbol(!0 - self.gensyms.len() as u32 + 1)
289 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
290 symbol.0 as usize >= self.strings.len()
293 pub fn get(&self, symbol: Symbol) -> &str {
294 match self.strings.get(symbol.0 as usize) {
295 Some(string) => string,
296 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
301 // In this macro, there is the requirement that the name (the number) must be monotonically
302 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
303 // except starting from the next number instead of zero.
304 macro_rules! declare_keywords {(
305 $( ($index: expr, $konst: ident, $string: expr) )*
308 use super::{Symbol, Ident};
309 #[derive(Clone, Copy, PartialEq, Eq)]
314 #[inline] pub fn ident(self) -> Ident { self.ident }
315 #[inline] pub fn name(self) -> Symbol { self.ident.name }
318 #[allow(non_upper_case_globals)]
319 pub const $konst: Keyword = Keyword {
320 ident: Ident::with_empty_ctxt(super::Symbol($index))
324 impl ::std::str::FromStr for Keyword {
327 fn from_str(s: &str) -> Result<Self, ()> {
329 $($string => Ok($konst),)*
337 pub fn fresh() -> Self {
338 Interner::prefill(&[$($string,)*])
343 // NB: leaving holes in the ident table is bad! a different ident will get
344 // interned with the id from the hole, but it will be between the min and max
345 // of the reserved words, and thus tagged as "reserved".
346 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
347 // this should be rarely necessary though if the keywords are kept in alphabetic order.
349 // Special reserved identifiers used internally for elided lifetimes,
350 // unnamed method parameters, crate root module, error recovery etc.
352 (1, PathRoot, "{{root}}")
353 (2, DollarCrate, "$crate")
356 // Keywords that are used in stable Rust.
361 (8, Continue, "continue")
365 (12, Extern, "extern")
380 (27, Return, "return")
381 (28, SelfLower, "self")
382 (29, SelfUpper, "Self")
383 (30, Static, "static")
384 (31, Struct, "struct")
389 (36, Unsafe, "unsafe")
394 // Keywords that are used in unstable Rust or reserved for future use.
395 (40, Abstract, "abstract")
396 (41, Become, "become")
400 (45, Override, "override")
402 (47, Typeof, "typeof")
403 (48, Unsized, "unsized")
404 (49, Virtual, "virtual")
407 // Edition-specific keywords that are used in stable Rust.
408 (51, Dyn, "dyn") // >= 2018 Edition only
410 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
411 (52, Async, "async") // >= 2018 Edition only
412 (53, Try, "try") // >= 2018 Edition only
414 // Special lifetime names
415 (54, UnderscoreLifetime, "'_")
416 (55, StaticLifetime, "'static")
418 // Weak keywords, have special meaning only in specific contexts.
421 (58, Default, "default")
422 (59, Existential, "existential")
427 fn is_used_keyword_2018(self) -> bool {
428 self == keywords::Dyn.name()
431 fn is_unused_keyword_2018(self) -> bool {
432 self >= keywords::Async.name() && self <= keywords::Try.name()
437 // Returns true for reserved identifiers used internally for elided lifetimes,
438 // unnamed method parameters, crate root module, error recovery etc.
439 pub fn is_special(self) -> bool {
440 self.name <= keywords::Underscore.name()
443 /// Returns `true` if the token is a keyword used in the language.
444 pub fn is_used_keyword(self) -> bool {
445 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
446 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
447 self.name.is_used_keyword_2018() && self.span.rust_2018()
450 /// Returns `true` if the token is a keyword reserved for possible future use.
451 pub fn is_unused_keyword(self) -> bool {
452 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
453 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
454 self.name.is_unused_keyword_2018() && self.span.rust_2018()
457 /// Returns `true` if the token is either a special identifier or a keyword.
458 pub fn is_reserved(self) -> bool {
459 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
462 /// A keyword or reserved identifier that can be used as a path segment.
463 pub fn is_path_segment_keyword(self) -> bool {
464 self.name == keywords::Super.name() ||
465 self.name == keywords::SelfLower.name() ||
466 self.name == keywords::SelfUpper.name() ||
467 self.name == keywords::Extern.name() ||
468 self.name == keywords::Crate.name() ||
469 self.name == keywords::PathRoot.name() ||
470 self.name == keywords::DollarCrate.name()
473 // We see this identifier in a normal identifier position, like variable name or a type.
474 // How was it written originally? Did it use the raw form? Let's try to guess.
475 pub fn is_raw_guess(self) -> bool {
476 self.name != keywords::Invalid.name() &&
477 self.is_reserved() && !self.is_path_segment_keyword()
481 // If an interner exists, return it. Otherwise, prepare a fresh one.
483 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
484 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
487 /// Represents a string stored in the interner. Because the interner outlives any thread
488 /// which uses this type, we can safely treat `string` which points to interner data,
489 /// as an immortal string, as long as this type never crosses between threads.
490 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
491 // by creating a new thread right after constructing the interner
492 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
493 pub struct LocalInternedString {
494 string: &'static str,
497 impl LocalInternedString {
498 pub fn as_interned_str(self) -> InternedString {
500 symbol: Symbol::intern(self.string)
504 pub fn get(&self) -> &'static str {
509 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
511 str: ::std::convert::AsRef<U>
513 fn as_ref(&self) -> &U {
518 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
519 fn eq(&self, other: &T) -> bool {
520 self.string == other.deref()
524 impl ::std::cmp::PartialEq<LocalInternedString> for str {
525 fn eq(&self, other: &LocalInternedString) -> bool {
530 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
531 fn eq(&self, other: &LocalInternedString) -> bool {
532 *self == other.string
536 impl ::std::cmp::PartialEq<LocalInternedString> for String {
537 fn eq(&self, other: &LocalInternedString) -> bool {
542 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
543 fn eq(&self, other: &LocalInternedString) -> bool {
544 *self == other.string
548 impl !Send for LocalInternedString {}
549 impl !Sync for LocalInternedString {}
551 impl ::std::ops::Deref for LocalInternedString {
553 fn deref(&self) -> &str { self.string }
556 impl fmt::Debug for LocalInternedString {
557 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
558 fmt::Debug::fmt(self.string, f)
562 impl fmt::Display for LocalInternedString {
563 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
564 fmt::Display::fmt(self.string, f)
568 impl Decodable for LocalInternedString {
569 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
570 Ok(Symbol::intern(&d.read_str()?).as_str())
574 impl Encodable for LocalInternedString {
575 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
576 s.emit_str(self.string)
580 /// Represents a string stored in the string interner
581 #[derive(Clone, Copy, Eq)]
582 pub struct InternedString {
586 impl InternedString {
587 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
588 let str = with_interner(|interner| {
589 interner.get(self.symbol) as *const str
591 // This is safe because the interner keeps string alive until it is dropped.
592 // We can access it because we know the interner is still alive since we use a
593 // scoped thread local to access it, and it was alive at the beginning of this scope
597 pub fn as_symbol(self) -> Symbol {
601 pub fn as_str(self) -> LocalInternedString {
606 impl Hash for InternedString {
607 fn hash<H: Hasher>(&self, state: &mut H) {
608 self.with(|str| str.hash(state))
612 impl PartialOrd<InternedString> for InternedString {
613 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
614 if self.symbol == other.symbol {
615 return Some(Ordering::Equal);
617 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
621 impl Ord for InternedString {
622 fn cmp(&self, other: &InternedString) -> Ordering {
623 if self.symbol == other.symbol {
624 return Ordering::Equal;
626 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
630 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
631 fn eq(&self, other: &T) -> bool {
632 self.with(|string| string == other.deref())
636 impl PartialEq<InternedString> for InternedString {
637 fn eq(&self, other: &InternedString) -> bool {
638 self.symbol == other.symbol
642 impl PartialEq<InternedString> for str {
643 fn eq(&self, other: &InternedString) -> bool {
644 other.with(|string| self == string)
648 impl<'a> PartialEq<InternedString> for &'a str {
649 fn eq(&self, other: &InternedString) -> bool {
650 other.with(|string| *self == string)
654 impl PartialEq<InternedString> for String {
655 fn eq(&self, other: &InternedString) -> bool {
656 other.with(|string| self == string)
660 impl<'a> PartialEq<InternedString> for &'a String {
661 fn eq(&self, other: &InternedString) -> bool {
662 other.with(|string| *self == string)
666 impl ::std::convert::From<InternedString> for String {
667 fn from(val: InternedString) -> String {
668 val.as_symbol().to_string()
672 impl fmt::Debug for InternedString {
673 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
674 self.with(|str| fmt::Debug::fmt(&str, f))
678 impl fmt::Display for InternedString {
679 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
680 self.with(|str| fmt::Display::fmt(&str, f))
684 impl Decodable for InternedString {
685 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
686 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
690 impl Encodable for InternedString {
691 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
692 self.with(|string| s.emit_str(string))
702 fn interner_tests() {
703 let mut i: Interner = Interner::default();
704 // first one is zero:
705 assert_eq!(i.intern("dog"), Symbol(0));
706 // re-use gets the same entry:
707 assert_eq!(i.intern("dog"), Symbol(0));
708 // different string gets a different #:
709 assert_eq!(i.intern("cat"), Symbol(1));
710 assert_eq!(i.intern("cat"), Symbol(1));
711 // dog is still at zero
712 assert_eq!(i.intern("dog"), Symbol(0));
713 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
714 // gensym of same string gets new number :
715 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
716 // gensym of *existing* string gets new number:
717 assert_eq!(i.gensym("dog"), Symbol(4294967293));
721 fn without_first_quote_test() {
722 GLOBALS.set(&Globals::new(), || {
723 let i = Ident::from_str("'break");
724 assert_eq!(i.without_first_quote().name, keywords::Break.name());