1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
61 /// "Normalize" ident for use in comparisons using "item hygiene".
62 /// Identifiers with same string value become same if they came from the same "modern" macro
63 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
64 /// different "modern" macros.
65 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
66 pub fn modern(self) -> Ident {
67 Ident::new(self.name, self.span.modern())
70 /// "Normalize" ident for use in comparisons using "local variable hygiene".
71 /// Identifiers with same string value become same if they came from the same non-transparent
72 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
73 /// non-transparent macros.
74 /// Technically, this operation strips all transparent marks from ident's syntactic context.
75 pub fn modern_and_legacy(self) -> Ident {
76 Ident::new(self.name, self.span.modern_and_legacy())
79 pub fn gensym(self) -> Ident {
80 Ident::new(self.name.gensymed(), self.span)
83 pub fn gensym_if_underscore(self) -> Ident {
84 if self.name == keywords::Underscore.name() { self.gensym() } else { self }
87 pub fn as_str(self) -> LocalInternedString {
91 pub fn as_interned_str(self) -> InternedString {
92 self.name.as_interned_str()
96 impl PartialEq for Ident {
97 fn eq(&self, rhs: &Self) -> bool {
98 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
102 impl Hash for Ident {
103 fn hash<H: Hasher>(&self, state: &mut H) {
104 self.name.hash(state);
105 self.span.ctxt().hash(state);
109 impl fmt::Debug for Ident {
110 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 write!(f, "{}{:?}", self.name, self.span.ctxt())
115 impl fmt::Display for Ident {
116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
117 fmt::Display::fmt(&self.name, f)
121 impl Encodable for Ident {
122 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
123 if self.span.ctxt().modern() == SyntaxContext::empty() {
124 s.emit_str(&self.as_str())
125 } else { // FIXME(jseyfried) intercrate hygiene
126 let mut string = "#".to_owned();
127 string.push_str(&self.as_str());
133 impl Decodable for Ident {
134 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
135 let string = d.read_str()?;
136 Ok(if !string.starts_with('#') {
137 Ident::from_str(&string)
138 } else { // FIXME(jseyfried) intercrate hygiene
139 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
144 /// A symbol is an interned or gensymed string.
145 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
146 pub struct Symbol(u32);
148 // The interner is pointed to by a thread local value which is only set on the main thread
149 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
150 // to avoid panics and other errors, even though it would be memory safe to do so.
151 #[cfg(not(parallel_queries))]
152 impl !Send for Symbol { }
153 #[cfg(not(parallel_queries))]
154 impl !Sync for Symbol { }
157 /// Maps a string to its interned representation.
158 pub fn intern(string: &str) -> Self {
159 with_interner(|interner| interner.intern(string))
162 pub fn interned(self) -> Self {
163 with_interner(|interner| interner.interned(self))
166 /// gensym's a new usize, using the current interner.
167 pub fn gensym(string: &str) -> Self {
168 with_interner(|interner| interner.gensym(string))
171 pub fn gensymed(self) -> Self {
172 with_interner(|interner| interner.gensymed(self))
175 pub fn as_str(self) -> LocalInternedString {
176 with_interner(|interner| unsafe {
177 LocalInternedString {
178 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
183 pub fn as_interned_str(self) -> InternedString {
184 with_interner(|interner| InternedString {
185 symbol: interner.interned(self)
189 pub fn as_u32(self) -> u32 {
194 impl fmt::Debug for Symbol {
195 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
196 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
198 write!(f, "{}({})", self, self.0)
200 write!(f, "{}", self)
205 impl fmt::Display for Symbol {
206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 fmt::Display::fmt(&self.as_str(), f)
211 impl Encodable for Symbol {
212 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
213 s.emit_str(&self.as_str())
217 impl Decodable for Symbol {
218 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
219 Ok(Symbol::intern(&d.read_str()?))
223 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
224 fn eq(&self, other: &T) -> bool {
225 self.as_str() == other.deref()
229 // The &'static strs in this type actually point into the arena
231 pub struct Interner {
232 arena: DroplessArena,
233 names: FxHashMap<&'static str, Symbol>,
234 strings: Vec<&'static str>,
235 gensyms: Vec<Symbol>,
239 fn prefill(init: &[&str]) -> Self {
240 let mut this = Interner::default();
241 for &string in init {
243 // We can't allocate empty strings in the arena, so handle this here
244 let name = Symbol(this.strings.len() as u32);
245 this.names.insert("", name);
246 this.strings.push("");
254 pub fn intern(&mut self, string: &str) -> Symbol {
255 if let Some(&name) = self.names.get(string) {
259 let name = Symbol(self.strings.len() as u32);
261 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
262 let string: &str = unsafe {
263 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
265 // It is safe to extend the arena allocation to 'static because we only access
266 // these while the arena is still alive
267 let string: &'static str = unsafe {
268 &*(string as *const str)
270 self.strings.push(string);
271 self.names.insert(string, name);
275 pub fn interned(&self, symbol: Symbol) -> Symbol {
276 if (symbol.0 as usize) < self.strings.len() {
279 self.interned(self.gensyms[(!0 - symbol.0) as usize])
283 fn gensym(&mut self, string: &str) -> Symbol {
284 let symbol = self.intern(string);
285 self.gensymed(symbol)
288 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
289 self.gensyms.push(symbol);
290 Symbol(!0 - self.gensyms.len() as u32 + 1)
293 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
294 symbol.0 as usize >= self.strings.len()
297 pub fn get(&self, symbol: Symbol) -> &str {
298 match self.strings.get(symbol.0 as usize) {
299 Some(string) => string,
300 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
305 // In this macro, there is the requirement that the name (the number) must be monotonically
306 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
307 // except starting from the next number instead of zero.
308 macro_rules! declare_keywords {(
309 $( ($index: expr, $konst: ident, $string: expr) )*
312 use super::{Symbol, Ident};
313 #[derive(Clone, Copy, PartialEq, Eq)]
318 #[inline] pub fn ident(self) -> Ident { self.ident }
319 #[inline] pub fn name(self) -> Symbol { self.ident.name }
322 #[allow(non_upper_case_globals)]
323 pub const $konst: Keyword = Keyword {
324 ident: Ident::with_empty_ctxt(super::Symbol($index))
328 impl ::std::str::FromStr for Keyword {
331 fn from_str(s: &str) -> Result<Self, ()> {
333 $($string => Ok($konst),)*
341 pub fn fresh() -> Self {
342 Interner::prefill(&[$($string,)*])
347 // NB: leaving holes in the ident table is bad! a different ident will get
348 // interned with the id from the hole, but it will be between the min and max
349 // of the reserved words, and thus tagged as "reserved".
350 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
351 // this should be rarely necessary though if the keywords are kept in alphabetic order.
353 // Special reserved identifiers used internally for elided lifetimes,
354 // unnamed method parameters, crate root module, error recovery etc.
356 (1, CrateRoot, "{{root}}")
357 (2, DollarCrate, "$crate")
360 // Keywords used in the language.
365 (8, Continue, "continue")
369 (12, Extern, "extern")
384 (27, Return, "return")
385 (28, SelfValue, "self")
386 (29, SelfType, "Self")
387 (30, Static, "static")
388 (31, Struct, "struct")
393 (36, Unsafe, "unsafe")
398 // Keywords reserved for future use.
399 (40, Abstract, "abstract")
400 (41, Become, "become")
404 (45, Override, "override")
406 (47, Typeof, "typeof")
407 (48, Unsized, "unsized")
408 (49, Virtual, "virtual")
411 // Edition-specific keywords reserved for future use.
412 (51, Async, "async") // >= 2018 Edition only
413 (52, Dyn, "dyn") // >= 2018 Edition only
414 (53, Try, "try") // >= 2018 Edition only
416 // Special lifetime names
417 (54, UnderscoreLifetime, "'_")
418 (55, StaticLifetime, "'static")
420 // Weak keywords, have special meaning only in specific contexts.
423 (58, Default, "default")
425 (60, Existential, "existential")
429 fn is_unused_keyword_2018(self) -> bool {
430 self >= keywords::Async.name() && self <= keywords::Try.name()
435 // Returns true for reserved identifiers used internally for elided lifetimes,
436 // unnamed method parameters, crate root module, error recovery etc.
437 pub fn is_special(self) -> bool {
438 self.name <= keywords::Underscore.name()
441 /// Returns `true` if the token is a keyword used in the language.
442 pub fn is_used_keyword(self) -> bool {
443 self.name >= keywords::As.name() && self.name <= keywords::While.name()
446 /// Returns `true` if the token is a keyword reserved for possible future use.
447 pub fn is_unused_keyword(self) -> bool {
448 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
449 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
450 self.name.is_unused_keyword_2018() && self.span.rust_2018()
453 /// Returns `true` if the token is either a special identifier or a keyword.
454 pub fn is_reserved(self) -> bool {
455 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
458 /// A keyword or reserved identifier that can be used as a path segment.
459 pub fn is_path_segment_keyword(self) -> bool {
460 self.name == keywords::Super.name() ||
461 self.name == keywords::SelfValue.name() ||
462 self.name == keywords::SelfType.name() ||
463 self.name == keywords::Extern.name() ||
464 self.name == keywords::Crate.name() ||
465 self.name == keywords::CrateRoot.name() ||
466 self.name == keywords::DollarCrate.name()
469 // We see this identifier in a normal identifier position, like variable name or a type.
470 // How was it written originally? Did it use the raw form? Let's try to guess.
471 pub fn is_raw_guess(self) -> bool {
472 self.name != keywords::Invalid.name() && self.name != keywords::Underscore.name() &&
473 self.is_reserved() && !self.is_path_segment_keyword()
477 // If an interner exists, return it. Otherwise, prepare a fresh one.
479 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
480 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
483 /// Represents a string stored in the interner. Because the interner outlives any thread
484 /// which uses this type, we can safely treat `string` which points to interner data,
485 /// as an immortal string, as long as this type never crosses between threads.
486 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
487 // by creating a new thread right after constructing the interner
488 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
489 pub struct LocalInternedString {
490 string: &'static str,
493 impl LocalInternedString {
494 pub fn as_interned_str(self) -> InternedString {
496 symbol: Symbol::intern(self.string)
500 pub fn get(&self) -> &'static str {
505 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
507 str: ::std::convert::AsRef<U>
509 fn as_ref(&self) -> &U {
514 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
515 fn eq(&self, other: &T) -> bool {
516 self.string == other.deref()
520 impl ::std::cmp::PartialEq<LocalInternedString> for str {
521 fn eq(&self, other: &LocalInternedString) -> bool {
526 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
527 fn eq(&self, other: &LocalInternedString) -> bool {
528 *self == other.string
532 impl ::std::cmp::PartialEq<LocalInternedString> for String {
533 fn eq(&self, other: &LocalInternedString) -> bool {
538 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
539 fn eq(&self, other: &LocalInternedString) -> bool {
540 *self == other.string
544 impl !Send for LocalInternedString {}
545 impl !Sync for LocalInternedString {}
547 impl ::std::ops::Deref for LocalInternedString {
549 fn deref(&self) -> &str { self.string }
552 impl fmt::Debug for LocalInternedString {
553 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
554 fmt::Debug::fmt(self.string, f)
558 impl fmt::Display for LocalInternedString {
559 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
560 fmt::Display::fmt(self.string, f)
564 impl Decodable for LocalInternedString {
565 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
566 Ok(Symbol::intern(&d.read_str()?).as_str())
570 impl Encodable for LocalInternedString {
571 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
572 s.emit_str(self.string)
576 /// Represents a string stored in the string interner
577 #[derive(Clone, Copy, Eq)]
578 pub struct InternedString {
582 impl InternedString {
583 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
584 let str = with_interner(|interner| {
585 interner.get(self.symbol) as *const str
587 // This is safe because the interner keeps string alive until it is dropped.
588 // We can access it because we know the interner is still alive since we use a
589 // scoped thread local to access it, and it was alive at the beginning of this scope
593 pub fn as_symbol(self) -> Symbol {
597 pub fn as_str(self) -> LocalInternedString {
602 impl Hash for InternedString {
603 fn hash<H: Hasher>(&self, state: &mut H) {
604 self.with(|str| str.hash(state))
608 impl PartialOrd<InternedString> for InternedString {
609 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
610 if self.symbol == other.symbol {
611 return Some(Ordering::Equal);
613 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
617 impl Ord for InternedString {
618 fn cmp(&self, other: &InternedString) -> Ordering {
619 if self.symbol == other.symbol {
620 return Ordering::Equal;
622 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
626 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
627 fn eq(&self, other: &T) -> bool {
628 self.with(|string| string == other.deref())
632 impl PartialEq<InternedString> for InternedString {
633 fn eq(&self, other: &InternedString) -> bool {
634 self.symbol == other.symbol
638 impl PartialEq<InternedString> for str {
639 fn eq(&self, other: &InternedString) -> bool {
640 other.with(|string| self == string)
644 impl<'a> PartialEq<InternedString> for &'a str {
645 fn eq(&self, other: &InternedString) -> bool {
646 other.with(|string| *self == string)
650 impl PartialEq<InternedString> for String {
651 fn eq(&self, other: &InternedString) -> bool {
652 other.with(|string| self == string)
656 impl<'a> PartialEq<InternedString> for &'a String {
657 fn eq(&self, other: &InternedString) -> bool {
658 other.with(|string| *self == string)
662 impl ::std::convert::From<InternedString> for String {
663 fn from(val: InternedString) -> String {
664 val.as_symbol().to_string()
668 impl fmt::Debug for InternedString {
669 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
670 self.with(|str| fmt::Debug::fmt(&str, f))
674 impl fmt::Display for InternedString {
675 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
676 self.with(|str| fmt::Display::fmt(&str, f))
680 impl Decodable for InternedString {
681 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
682 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
686 impl Encodable for InternedString {
687 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
688 self.with(|string| s.emit_str(string))
698 fn interner_tests() {
699 let mut i: Interner = Interner::default();
700 // first one is zero:
701 assert_eq!(i.intern("dog"), Symbol(0));
702 // re-use gets the same entry:
703 assert_eq!(i.intern("dog"), Symbol(0));
704 // different string gets a different #:
705 assert_eq!(i.intern("cat"), Symbol(1));
706 assert_eq!(i.intern("cat"), Symbol(1));
707 // dog is still at zero
708 assert_eq!(i.intern("dog"), Symbol(0));
709 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
710 // gensym of same string gets new number :
711 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
712 // gensym of *existing* string gets new number:
713 assert_eq!(i.gensym("dog"), Symbol(4294967293));
717 fn without_first_quote_test() {
718 GLOBALS.set(&Globals::new(), || {
719 let i = Ident::from_str("'break");
720 assert_eq!(i.without_first_quote().name, keywords::Break.name());