1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
61 /// "Normalize" ident for use in comparisons using "item hygiene".
62 /// Identifiers with same string value become same if they came from the same "modern" macro
63 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
64 /// different "modern" macros.
65 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
66 pub fn modern(self) -> Ident {
67 Ident::new(self.name, self.span.modern())
70 /// "Normalize" ident for use in comparisons using "local variable hygiene".
71 /// Identifiers with same string value become same if they came from the same non-transparent
72 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
73 /// non-transparent macros.
74 /// Technically, this operation strips all transparent marks from ident's syntactic context.
75 pub fn modern_and_legacy(self) -> Ident {
76 Ident::new(self.name, self.span.modern_and_legacy())
79 pub fn gensym(self) -> Ident {
80 Ident::new(self.name.gensymed(), self.span)
83 pub fn as_str(self) -> LocalInternedString {
87 pub fn as_interned_str(self) -> InternedString {
88 self.name.as_interned_str()
92 impl PartialEq for Ident {
93 fn eq(&self, rhs: &Self) -> bool {
94 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
99 fn hash<H: Hasher>(&self, state: &mut H) {
100 self.name.hash(state);
101 self.span.ctxt().hash(state);
105 impl fmt::Debug for Ident {
106 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107 write!(f, "{}{:?}", self.name, self.span.ctxt())
111 impl fmt::Display for Ident {
112 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 fmt::Display::fmt(&self.name, f)
117 impl Encodable for Ident {
118 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
119 if self.span.ctxt().modern() == SyntaxContext::empty() {
120 s.emit_str(&self.as_str())
121 } else { // FIXME(jseyfried) intercrate hygiene
122 let mut string = "#".to_owned();
123 string.push_str(&self.as_str());
129 impl Decodable for Ident {
130 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
131 let string = d.read_str()?;
132 Ok(if !string.starts_with('#') {
133 Ident::from_str(&string)
134 } else { // FIXME(jseyfried) intercrate hygiene
135 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
140 /// A symbol is an interned or gensymed string.
141 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142 pub struct Symbol(u32);
144 // The interner is pointed to by a thread local value which is only set on the main thread
145 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
146 // to avoid panics and other errors, even though it would be memory safe to do so.
147 #[cfg(not(parallel_queries))]
148 impl !Send for Symbol { }
149 #[cfg(not(parallel_queries))]
150 impl !Sync for Symbol { }
153 /// Maps a string to its interned representation.
154 pub fn intern(string: &str) -> Self {
155 with_interner(|interner| interner.intern(string))
158 pub fn interned(self) -> Self {
159 with_interner(|interner| interner.interned(self))
162 /// gensym's a new usize, using the current interner.
163 pub fn gensym(string: &str) -> Self {
164 with_interner(|interner| interner.gensym(string))
167 pub fn gensymed(self) -> Self {
168 with_interner(|interner| interner.gensymed(self))
171 pub fn as_str(self) -> LocalInternedString {
172 with_interner(|interner| unsafe {
173 LocalInternedString {
174 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
179 pub fn as_interned_str(self) -> InternedString {
180 with_interner(|interner| InternedString {
181 symbol: interner.interned(self)
185 pub fn as_u32(self) -> u32 {
190 impl fmt::Debug for Symbol {
191 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
194 write!(f, "{}({})", self, self.0)
196 write!(f, "{}", self)
201 impl fmt::Display for Symbol {
202 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
203 fmt::Display::fmt(&self.as_str(), f)
207 impl Encodable for Symbol {
208 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
209 s.emit_str(&self.as_str())
213 impl Decodable for Symbol {
214 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
215 Ok(Symbol::intern(&d.read_str()?))
219 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
220 fn eq(&self, other: &T) -> bool {
221 self.as_str() == other.deref()
225 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 fn prefill(init: &[&str]) -> Self {
236 let mut this = Interner::default();
237 for &string in init {
239 // We can't allocate empty strings in the arena, so handle this here
240 let name = Symbol(this.strings.len() as u32);
241 this.names.insert("", name);
242 this.strings.push("");
250 pub fn intern(&mut self, string: &str) -> Symbol {
251 if let Some(&name) = self.names.get(string) {
255 let name = Symbol(self.strings.len() as u32);
257 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
258 let string: &str = unsafe {
259 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
261 // It is safe to extend the arena allocation to 'static because we only access
262 // these while the arena is still alive
263 let string: &'static str = unsafe {
264 &*(string as *const str)
266 self.strings.push(string);
267 self.names.insert(string, name);
271 pub fn interned(&self, symbol: Symbol) -> Symbol {
272 if (symbol.0 as usize) < self.strings.len() {
275 self.interned(self.gensyms[(!0 - symbol.0) as usize])
279 fn gensym(&mut self, string: &str) -> Symbol {
280 let symbol = self.intern(string);
281 self.gensymed(symbol)
284 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
285 self.gensyms.push(symbol);
286 Symbol(!0 - self.gensyms.len() as u32 + 1)
289 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
290 symbol.0 as usize >= self.strings.len()
293 pub fn get(&self, symbol: Symbol) -> &str {
294 match self.strings.get(symbol.0 as usize) {
295 Some(string) => string,
296 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
301 // In this macro, there is the requirement that the name (the number) must be monotonically
302 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
303 // except starting from the next number instead of zero.
304 macro_rules! declare_keywords {(
305 $( ($index: expr, $konst: ident, $string: expr) )*
308 use super::{Symbol, Ident};
309 #[derive(Clone, Copy, PartialEq, Eq)]
314 #[inline] pub fn ident(self) -> Ident { self.ident }
315 #[inline] pub fn name(self) -> Symbol { self.ident.name }
318 #[allow(non_upper_case_globals)]
319 pub const $konst: Keyword = Keyword {
320 ident: Ident::with_empty_ctxt(super::Symbol($index))
324 impl ::std::str::FromStr for Keyword {
327 fn from_str(s: &str) -> Result<Self, ()> {
329 $($string => Ok($konst),)*
337 pub fn fresh() -> Self {
338 Interner::prefill(&[$($string,)*])
343 // NB: leaving holes in the ident table is bad! a different ident will get
344 // interned with the id from the hole, but it will be between the min and max
345 // of the reserved words, and thus tagged as "reserved".
346 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
347 // this should be rarely necessary though if the keywords are kept in alphabetic order.
349 // Special reserved identifiers used internally for elided lifetimes,
350 // unnamed method parameters, crate root module, error recovery etc.
352 (1, CrateRoot, "{{root}}")
353 (2, DollarCrate, "$crate")
356 // Keywords used in the language.
361 (8, Continue, "continue")
365 (12, Extern, "extern")
380 (27, Return, "return")
381 (28, SelfValue, "self")
382 (29, SelfType, "Self")
383 (30, Static, "static")
384 (31, Struct, "struct")
389 (36, Unsafe, "unsafe")
394 // Keywords reserved for future use.
395 (40, Abstract, "abstract")
396 (41, Become, "become")
400 (45, Override, "override")
402 (47, Typeof, "typeof")
403 (48, Unsized, "unsized")
404 (49, Virtual, "virtual")
407 // Edition-specific keywords reserved for future use.
408 (51, Async, "async") // >= 2018 Edition only
409 (52, Dyn, "dyn") // >= 2018 Edition only
410 (53, Try, "try") // >= 2018 Edition only
412 // Special lifetime names
413 (54, UnderscoreLifetime, "'_")
414 (55, StaticLifetime, "'static")
416 // Weak keywords, have special meaning only in specific contexts.
419 (58, Default, "default")
421 (60, Existential, "existential")
425 fn is_unused_keyword_2018(self) -> bool {
426 self >= keywords::Async.name() && self <= keywords::Try.name()
431 // Returns true for reserved identifiers used internally for elided lifetimes,
432 // unnamed method parameters, crate root module, error recovery etc.
433 pub fn is_special(self) -> bool {
434 self.name <= keywords::Underscore.name()
437 /// Returns `true` if the token is a keyword used in the language.
438 pub fn is_used_keyword(self) -> bool {
439 self.name >= keywords::As.name() && self.name <= keywords::While.name()
442 /// Returns `true` if the token is a keyword reserved for possible future use.
443 pub fn is_unused_keyword(self) -> bool {
444 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
445 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
446 self.name.is_unused_keyword_2018() && self.span.rust_2018()
449 /// Returns `true` if the token is either a special identifier or a keyword.
450 pub fn is_reserved(self) -> bool {
451 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
454 /// A keyword or reserved identifier that can be used as a path segment.
455 pub fn is_path_segment_keyword(self) -> bool {
456 self.name == keywords::Super.name() ||
457 self.name == keywords::SelfValue.name() ||
458 self.name == keywords::SelfType.name() ||
459 self.name == keywords::Extern.name() ||
460 self.name == keywords::Crate.name() ||
461 self.name == keywords::CrateRoot.name() ||
462 self.name == keywords::DollarCrate.name()
465 // We see this identifier in a normal identifier position, like variable name or a type.
466 // How was it written originally? Did it use the raw form? Let's try to guess.
467 pub fn is_raw_guess(self) -> bool {
468 self.name != keywords::Invalid.name() &&
469 self.is_reserved() && !self.is_path_segment_keyword()
473 // If an interner exists, return it. Otherwise, prepare a fresh one.
475 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
476 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
479 /// Represents a string stored in the interner. Because the interner outlives any thread
480 /// which uses this type, we can safely treat `string` which points to interner data,
481 /// as an immortal string, as long as this type never crosses between threads.
482 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
483 // by creating a new thread right after constructing the interner
484 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
485 pub struct LocalInternedString {
486 string: &'static str,
489 impl LocalInternedString {
490 pub fn as_interned_str(self) -> InternedString {
492 symbol: Symbol::intern(self.string)
496 pub fn get(&self) -> &'static str {
501 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
503 str: ::std::convert::AsRef<U>
505 fn as_ref(&self) -> &U {
510 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
511 fn eq(&self, other: &T) -> bool {
512 self.string == other.deref()
516 impl ::std::cmp::PartialEq<LocalInternedString> for str {
517 fn eq(&self, other: &LocalInternedString) -> bool {
522 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
523 fn eq(&self, other: &LocalInternedString) -> bool {
524 *self == other.string
528 impl ::std::cmp::PartialEq<LocalInternedString> for String {
529 fn eq(&self, other: &LocalInternedString) -> bool {
534 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
535 fn eq(&self, other: &LocalInternedString) -> bool {
536 *self == other.string
540 impl !Send for LocalInternedString {}
541 impl !Sync for LocalInternedString {}
543 impl ::std::ops::Deref for LocalInternedString {
545 fn deref(&self) -> &str { self.string }
548 impl fmt::Debug for LocalInternedString {
549 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
550 fmt::Debug::fmt(self.string, f)
554 impl fmt::Display for LocalInternedString {
555 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
556 fmt::Display::fmt(self.string, f)
560 impl Decodable for LocalInternedString {
561 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
562 Ok(Symbol::intern(&d.read_str()?).as_str())
566 impl Encodable for LocalInternedString {
567 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
568 s.emit_str(self.string)
572 /// Represents a string stored in the string interner
573 #[derive(Clone, Copy, Eq)]
574 pub struct InternedString {
578 impl InternedString {
579 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
580 let str = with_interner(|interner| {
581 interner.get(self.symbol) as *const str
583 // This is safe because the interner keeps string alive until it is dropped.
584 // We can access it because we know the interner is still alive since we use a
585 // scoped thread local to access it, and it was alive at the beginning of this scope
589 pub fn as_symbol(self) -> Symbol {
593 pub fn as_str(self) -> LocalInternedString {
598 impl Hash for InternedString {
599 fn hash<H: Hasher>(&self, state: &mut H) {
600 self.with(|str| str.hash(state))
604 impl PartialOrd<InternedString> for InternedString {
605 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
606 if self.symbol == other.symbol {
607 return Some(Ordering::Equal);
609 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
613 impl Ord for InternedString {
614 fn cmp(&self, other: &InternedString) -> Ordering {
615 if self.symbol == other.symbol {
616 return Ordering::Equal;
618 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
622 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
623 fn eq(&self, other: &T) -> bool {
624 self.with(|string| string == other.deref())
628 impl PartialEq<InternedString> for InternedString {
629 fn eq(&self, other: &InternedString) -> bool {
630 self.symbol == other.symbol
634 impl PartialEq<InternedString> for str {
635 fn eq(&self, other: &InternedString) -> bool {
636 other.with(|string| self == string)
640 impl<'a> PartialEq<InternedString> for &'a str {
641 fn eq(&self, other: &InternedString) -> bool {
642 other.with(|string| *self == string)
646 impl PartialEq<InternedString> for String {
647 fn eq(&self, other: &InternedString) -> bool {
648 other.with(|string| self == string)
652 impl<'a> PartialEq<InternedString> for &'a String {
653 fn eq(&self, other: &InternedString) -> bool {
654 other.with(|string| *self == string)
658 impl ::std::convert::From<InternedString> for String {
659 fn from(val: InternedString) -> String {
660 val.as_symbol().to_string()
664 impl fmt::Debug for InternedString {
665 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
666 self.with(|str| fmt::Debug::fmt(&str, f))
670 impl fmt::Display for InternedString {
671 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
672 self.with(|str| fmt::Display::fmt(&str, f))
676 impl Decodable for InternedString {
677 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
678 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
682 impl Encodable for InternedString {
683 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
684 self.with(|string| s.emit_str(string))
694 fn interner_tests() {
695 let mut i: Interner = Interner::default();
696 // first one is zero:
697 assert_eq!(i.intern("dog"), Symbol(0));
698 // re-use gets the same entry:
699 assert_eq!(i.intern("dog"), Symbol(0));
700 // different string gets a different #:
701 assert_eq!(i.intern("cat"), Symbol(1));
702 assert_eq!(i.intern("cat"), Symbol(1));
703 // dog is still at zero
704 assert_eq!(i.intern("dog"), Symbol(0));
705 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
706 // gensym of same string gets new number :
707 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
708 // gensym of *existing* string gets new number:
709 assert_eq!(i.gensym("dog"), Symbol(4294967293));
713 fn without_first_quote_test() {
714 GLOBALS.set(&Globals::new(), || {
715 let i = Ident::from_str("'break");
716 assert_eq!(i.without_first_quote().name, keywords::Break.name());