1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
61 /// "Normalize" ident for use in comparisons using "item hygiene".
62 /// Identifiers with same string value become same if they came from the same "modern" macro
63 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
64 /// different "modern" macros.
65 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
66 pub fn modern(self) -> Ident {
67 Ident::new(self.name, self.span.modern())
70 /// "Normalize" ident for use in comparisons using "local variable hygiene".
71 /// Identifiers with same string value become same if they came from the same non-transparent
72 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
73 /// non-transparent macros.
74 /// Technically, this operation strips all transparent marks from ident's syntactic context.
75 pub fn modern_and_legacy(self) -> Ident {
76 Ident::new(self.name, self.span.modern_and_legacy())
79 pub fn gensym(self) -> Ident {
80 Ident::new(self.name.gensymed(), self.span)
83 pub fn gensym_if_underscore(self) -> Ident {
84 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
87 pub fn as_str(self) -> LocalInternedString {
91 pub fn as_interned_str(self) -> InternedString {
92 self.name.as_interned_str()
96 impl PartialEq for Ident {
97 fn eq(&self, rhs: &Self) -> bool {
98 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
102 impl Hash for Ident {
103 fn hash<H: Hasher>(&self, state: &mut H) {
104 self.name.hash(state);
105 self.span.ctxt().hash(state);
109 impl fmt::Debug for Ident {
110 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 write!(f, "{}{:?}", self.name, self.span.ctxt())
115 impl fmt::Display for Ident {
116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
117 fmt::Display::fmt(&self.name, f)
121 impl Encodable for Ident {
122 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
123 if self.span.ctxt().modern() == SyntaxContext::empty() {
124 s.emit_str(&self.as_str())
125 } else { // FIXME(jseyfried) intercrate hygiene
126 let mut string = "#".to_owned();
127 string.push_str(&self.as_str());
133 impl Decodable for Ident {
134 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
135 let string = d.read_str()?;
136 Ok(if !string.starts_with('#') {
137 Ident::from_str(&string)
138 } else { // FIXME(jseyfried) intercrate hygiene
139 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
144 /// A symbol is an interned or gensymed string.
145 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
146 pub struct Symbol(u32);
148 // The interner is pointed to by a thread local value which is only set on the main thread
149 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
150 // to avoid panics and other errors, even though it would be memory safe to do so.
151 #[cfg(not(parallel_queries))]
152 impl !Send for Symbol { }
153 #[cfg(not(parallel_queries))]
154 impl !Sync for Symbol { }
157 /// Maps a string to its interned representation.
158 pub fn intern(string: &str) -> Self {
159 with_interner(|interner| interner.intern(string))
162 pub fn interned(self) -> Self {
163 with_interner(|interner| interner.interned(self))
166 /// gensym's a new usize, using the current interner.
167 pub fn gensym(string: &str) -> Self {
168 with_interner(|interner| interner.gensym(string))
171 pub fn gensymed(self) -> Self {
172 with_interner(|interner| interner.gensymed(self))
175 pub fn as_str(self) -> LocalInternedString {
176 with_interner(|interner| unsafe {
177 LocalInternedString {
178 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
183 pub fn as_interned_str(self) -> InternedString {
184 with_interner(|interner| InternedString {
185 symbol: interner.interned(self)
189 pub fn as_u32(self) -> u32 {
194 impl fmt::Debug for Symbol {
195 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
196 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
198 write!(f, "{}({})", self, self.0)
200 write!(f, "{}", self)
205 impl fmt::Display for Symbol {
206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 fmt::Display::fmt(&self.as_str(), f)
211 impl Encodable for Symbol {
212 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
213 s.emit_str(&self.as_str())
217 impl Decodable for Symbol {
218 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
219 Ok(Symbol::intern(&d.read_str()?))
223 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
224 fn eq(&self, other: &T) -> bool {
225 self.as_str() == other.deref()
229 // The &'static strs in this type actually point into the arena
231 pub struct Interner {
232 arena: DroplessArena,
233 names: FxHashMap<&'static str, Symbol>,
234 strings: Vec<&'static str>,
235 gensyms: Vec<Symbol>,
239 fn prefill(init: &[&str]) -> Self {
240 let mut this = Interner::default();
241 for &string in init {
243 // We can't allocate empty strings in the arena, so handle this here
244 let name = Symbol(this.strings.len() as u32);
245 this.names.insert("", name);
246 this.strings.push("");
254 pub fn intern(&mut self, string: &str) -> Symbol {
255 if let Some(&name) = self.names.get(string) {
259 let name = Symbol(self.strings.len() as u32);
261 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
262 let string: &str = unsafe {
263 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
265 // It is safe to extend the arena allocation to 'static because we only access
266 // these while the arena is still alive
267 let string: &'static str = unsafe {
268 &*(string as *const str)
270 self.strings.push(string);
271 self.names.insert(string, name);
275 pub fn interned(&self, symbol: Symbol) -> Symbol {
276 if (symbol.0 as usize) < self.strings.len() {
279 self.interned(self.gensyms[(!0 - symbol.0) as usize])
283 fn gensym(&mut self, string: &str) -> Symbol {
284 let symbol = self.intern(string);
285 self.gensymed(symbol)
288 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
289 self.gensyms.push(symbol);
290 Symbol(!0 - self.gensyms.len() as u32 + 1)
293 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
294 symbol.0 as usize >= self.strings.len()
297 pub fn get(&self, symbol: Symbol) -> &str {
298 match self.strings.get(symbol.0 as usize) {
299 Some(string) => string,
300 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
305 // In this macro, there is the requirement that the name (the number) must be monotonically
306 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
307 // except starting from the next number instead of zero.
308 macro_rules! declare_keywords {(
309 $( ($index: expr, $konst: ident, $string: expr) )*
312 use super::{Symbol, Ident};
313 #[derive(Clone, Copy, PartialEq, Eq)]
318 #[inline] pub fn ident(self) -> Ident { self.ident }
319 #[inline] pub fn name(self) -> Symbol { self.ident.name }
322 #[allow(non_upper_case_globals)]
323 pub const $konst: Keyword = Keyword {
324 ident: Ident::with_empty_ctxt(super::Symbol($index))
328 impl ::std::str::FromStr for Keyword {
331 fn from_str(s: &str) -> Result<Self, ()> {
333 $($string => Ok($konst),)*
341 pub fn fresh() -> Self {
342 Interner::prefill(&[$($string,)*])
347 // NB: leaving holes in the ident table is bad! a different ident will get
348 // interned with the id from the hole, but it will be between the min and max
349 // of the reserved words, and thus tagged as "reserved".
350 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
351 // this should be rarely necessary though if the keywords are kept in alphabetic order.
353 // Special reserved identifiers used internally for elided lifetimes,
354 // unnamed method parameters, crate root module, error recovery etc.
356 (1, PathRoot, "{{root}}")
357 (2, DollarCrate, "$crate")
360 // Keywords that are used in stable Rust.
365 (8, Continue, "continue")
369 (12, Extern, "extern")
384 (27, Return, "return")
385 (28, SelfLower, "self")
386 (29, SelfUpper, "Self")
387 (30, Static, "static")
388 (31, Struct, "struct")
393 (36, Unsafe, "unsafe")
398 // Keywords that are used in unstable Rust or reserved for future use.
399 (40, Abstract, "abstract")
400 (41, Become, "become")
404 (45, Override, "override")
406 (47, Typeof, "typeof")
407 (48, Unsized, "unsized")
408 (49, Virtual, "virtual")
411 // Edition-specific keywords that are used in stable Rust.
412 (51, Dyn, "dyn") // >= 2018 Edition only
414 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
415 (52, Async, "async") // >= 2018 Edition only
416 (53, Try, "try") // >= 2018 Edition only
418 // Special lifetime names
419 (54, UnderscoreLifetime, "'_")
420 (55, StaticLifetime, "'static")
422 // Weak keywords, have special meaning only in specific contexts.
425 (58, Default, "default")
426 (59, Existential, "existential")
431 fn is_used_keyword_2018(self) -> bool {
432 self == keywords::Dyn.name()
435 fn is_unused_keyword_2018(self) -> bool {
436 self >= keywords::Async.name() && self <= keywords::Try.name()
441 // Returns true for reserved identifiers used internally for elided lifetimes,
442 // unnamed method parameters, crate root module, error recovery etc.
443 pub fn is_special(self) -> bool {
444 self.name <= keywords::Underscore.name()
447 /// Returns `true` if the token is a keyword used in the language.
448 pub fn is_used_keyword(self) -> bool {
449 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
450 self.name >= keywords::As.name() && self.name <= keywords::While.name() ||
451 self.name.is_used_keyword_2018() && self.span.rust_2018()
454 /// Returns `true` if the token is a keyword reserved for possible future use.
455 pub fn is_unused_keyword(self) -> bool {
456 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
457 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
458 self.name.is_unused_keyword_2018() && self.span.rust_2018()
461 /// Returns `true` if the token is either a special identifier or a keyword.
462 pub fn is_reserved(self) -> bool {
463 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
466 /// A keyword or reserved identifier that can be used as a path segment.
467 pub fn is_path_segment_keyword(self) -> bool {
468 self.name == keywords::Super.name() ||
469 self.name == keywords::SelfLower.name() ||
470 self.name == keywords::SelfUpper.name() ||
471 self.name == keywords::Extern.name() ||
472 self.name == keywords::Crate.name() ||
473 self.name == keywords::PathRoot.name() ||
474 self.name == keywords::DollarCrate.name()
477 // We see this identifier in a normal identifier position, like variable name or a type.
478 // How was it written originally? Did it use the raw form? Let's try to guess.
479 pub fn is_raw_guess(self) -> bool {
480 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
481 self.is_reserved() && !self.is_path_segment_keyword()
485 // If an interner exists, return it. Otherwise, prepare a fresh one.
487 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
488 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
491 /// Represents a string stored in the interner. Because the interner outlives any thread
492 /// which uses this type, we can safely treat `string` which points to interner data,
493 /// as an immortal string, as long as this type never crosses between threads.
494 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
495 // by creating a new thread right after constructing the interner
496 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
497 pub struct LocalInternedString {
498 string: &'static str,
501 impl LocalInternedString {
502 pub fn as_interned_str(self) -> InternedString {
504 symbol: Symbol::intern(self.string)
508 pub fn get(&self) -> &'static str {
513 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
515 str: ::std::convert::AsRef<U>
517 fn as_ref(&self) -> &U {
522 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
523 fn eq(&self, other: &T) -> bool {
524 self.string == other.deref()
528 impl ::std::cmp::PartialEq<LocalInternedString> for str {
529 fn eq(&self, other: &LocalInternedString) -> bool {
534 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
535 fn eq(&self, other: &LocalInternedString) -> bool {
536 *self == other.string
540 impl ::std::cmp::PartialEq<LocalInternedString> for String {
541 fn eq(&self, other: &LocalInternedString) -> bool {
546 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
547 fn eq(&self, other: &LocalInternedString) -> bool {
548 *self == other.string
552 impl !Send for LocalInternedString {}
553 impl !Sync for LocalInternedString {}
555 impl ::std::ops::Deref for LocalInternedString {
557 fn deref(&self) -> &str { self.string }
560 impl fmt::Debug for LocalInternedString {
561 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
562 fmt::Debug::fmt(self.string, f)
566 impl fmt::Display for LocalInternedString {
567 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
568 fmt::Display::fmt(self.string, f)
572 impl Decodable for LocalInternedString {
573 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
574 Ok(Symbol::intern(&d.read_str()?).as_str())
578 impl Encodable for LocalInternedString {
579 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
580 s.emit_str(self.string)
584 /// Represents a string stored in the string interner
585 #[derive(Clone, Copy, Eq)]
586 pub struct InternedString {
590 impl InternedString {
591 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
592 let str = with_interner(|interner| {
593 interner.get(self.symbol) as *const str
595 // This is safe because the interner keeps string alive until it is dropped.
596 // We can access it because we know the interner is still alive since we use a
597 // scoped thread local to access it, and it was alive at the beginning of this scope
601 pub fn as_symbol(self) -> Symbol {
605 pub fn as_str(self) -> LocalInternedString {
610 impl Hash for InternedString {
611 fn hash<H: Hasher>(&self, state: &mut H) {
612 self.with(|str| str.hash(state))
616 impl PartialOrd<InternedString> for InternedString {
617 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
618 if self.symbol == other.symbol {
619 return Some(Ordering::Equal);
621 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
625 impl Ord for InternedString {
626 fn cmp(&self, other: &InternedString) -> Ordering {
627 if self.symbol == other.symbol {
628 return Ordering::Equal;
630 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
634 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
635 fn eq(&self, other: &T) -> bool {
636 self.with(|string| string == other.deref())
640 impl PartialEq<InternedString> for InternedString {
641 fn eq(&self, other: &InternedString) -> bool {
642 self.symbol == other.symbol
646 impl PartialEq<InternedString> for str {
647 fn eq(&self, other: &InternedString) -> bool {
648 other.with(|string| self == string)
652 impl<'a> PartialEq<InternedString> for &'a str {
653 fn eq(&self, other: &InternedString) -> bool {
654 other.with(|string| *self == string)
658 impl PartialEq<InternedString> for String {
659 fn eq(&self, other: &InternedString) -> bool {
660 other.with(|string| self == string)
664 impl<'a> PartialEq<InternedString> for &'a String {
665 fn eq(&self, other: &InternedString) -> bool {
666 other.with(|string| *self == string)
670 impl ::std::convert::From<InternedString> for String {
671 fn from(val: InternedString) -> String {
672 val.as_symbol().to_string()
676 impl fmt::Debug for InternedString {
677 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
678 self.with(|str| fmt::Debug::fmt(&str, f))
682 impl fmt::Display for InternedString {
683 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
684 self.with(|str| fmt::Display::fmt(&str, f))
688 impl Decodable for InternedString {
689 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
690 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
694 impl Encodable for InternedString {
695 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
696 self.with(|string| s.emit_str(string))
706 fn interner_tests() {
707 let mut i: Interner = Interner::default();
708 // first one is zero:
709 assert_eq!(i.intern("dog"), Symbol(0));
710 // re-use gets the same entry:
711 assert_eq!(i.intern("dog"), Symbol(0));
712 // different string gets a different #:
713 assert_eq!(i.intern("cat"), Symbol(1));
714 assert_eq!(i.intern("cat"), Symbol(1));
715 // dog is still at zero
716 assert_eq!(i.intern("dog"), Symbol(0));
717 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
718 // gensym of same string gets new number :
719 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
720 // gensym of *existing* string gets new number:
721 assert_eq!(i.gensym("dog"), Symbol(4294967293));
725 fn without_first_quote_test() {
726 GLOBALS.set(&Globals::new(), || {
727 let i = Ident::from_str("'break");
728 assert_eq!(i.without_first_quote().name, keywords::Break.name());