1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 /// "Normalize" ident for use in comparisons using "local variable hygiene".
72 /// Identifiers with same string value become same if they came from the same non-transparent
73 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
74 /// non-transparent macros.
75 /// Technically, this operation strips all transparent marks from ident's syntactic context.
76 pub fn modern_and_legacy(self) -> Ident {
77 Ident::new(self.name, self.span.modern_and_legacy())
80 pub fn gensym(self) -> Ident {
81 Ident::new(self.name.gensymed(), self.span)
84 pub fn as_str(self) -> LocalInternedString {
88 pub fn as_interned_str(self) -> InternedString {
89 self.name.as_interned_str()
93 impl PartialEq for Ident {
94 fn eq(&self, rhs: &Self) -> bool {
95 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.name.hash(state);
102 self.span.ctxt().hash(state);
106 impl fmt::Debug for Ident {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 write!(f, "{}{:?}", self.name, self.span.ctxt())
112 impl fmt::Display for Ident {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 fmt::Display::fmt(&self.name, f)
118 impl Encodable for Ident {
119 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
120 if self.span.ctxt().modern() == SyntaxContext::empty() {
121 s.emit_str(&self.as_str())
122 } else { // FIXME(jseyfried) intercrate hygiene
123 let mut string = "#".to_owned();
124 string.push_str(&self.as_str());
130 impl Decodable for Ident {
131 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
132 let string = d.read_str()?;
133 Ok(if !string.starts_with('#') {
134 Ident::from_str(&string)
135 } else { // FIXME(jseyfried) intercrate hygiene
136 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
141 /// A symbol is an interned or gensymed string.
142 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Symbol(u32);
145 // The interner is pointed to by a thread local value which is only set on the main thread
146 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
147 // to avoid panics and other errors, even though it would be memory safe to do so.
148 #[cfg(not(parallel_queries))]
149 impl !Send for Symbol { }
150 #[cfg(not(parallel_queries))]
151 impl !Sync for Symbol { }
154 /// Maps a string to its interned representation.
155 pub fn intern(string: &str) -> Self {
156 with_interner(|interner| interner.intern(string))
159 pub fn interned(self) -> Self {
160 with_interner(|interner| interner.interned(self))
163 /// gensym's a new usize, using the current interner.
164 pub fn gensym(string: &str) -> Self {
165 with_interner(|interner| interner.gensym(string))
168 pub fn gensymed(self) -> Self {
169 with_interner(|interner| interner.gensymed(self))
172 pub fn as_str(self) -> LocalInternedString {
173 with_interner(|interner| unsafe {
174 LocalInternedString {
175 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
180 pub fn as_interned_str(self) -> InternedString {
181 with_interner(|interner| InternedString {
182 symbol: interner.interned(self)
186 pub fn as_u32(self) -> u32 {
191 impl fmt::Debug for Symbol {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
195 write!(f, "{}({})", self, self.0)
197 write!(f, "{}", self)
202 impl fmt::Display for Symbol {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 fmt::Display::fmt(&self.as_str(), f)
208 impl Encodable for Symbol {
209 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
210 s.emit_str(&self.as_str())
214 impl Decodable for Symbol {
215 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
216 Ok(Symbol::intern(&d.read_str()?))
220 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
221 fn eq(&self, other: &T) -> bool {
222 self.as_str() == other.deref()
226 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 pub fn new() -> Self {
237 arena: DroplessArena::new(),
238 names: Default::default(),
239 strings: Default::default(),
240 gensyms: Default::default(),
244 fn prefill(init: &[&str]) -> Self {
245 let mut this = Interner::new();
246 for &string in init {
248 // We can't allocate empty strings in the arena, so handle this here
249 let name = Symbol(this.strings.len() as u32);
250 this.names.insert("", name);
251 this.strings.push("");
259 pub fn intern(&mut self, string: &str) -> Symbol {
260 if let Some(&name) = self.names.get(string) {
264 let name = Symbol(self.strings.len() as u32);
266 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
267 let string: &str = unsafe {
268 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
270 // It is safe to extend the arena allocation to 'static because we only access
271 // these while the arena is still alive
272 let string: &'static str = unsafe {
273 &*(string as *const str)
275 self.strings.push(string);
276 self.names.insert(string, name);
280 pub fn interned(&self, symbol: Symbol) -> Symbol {
281 if (symbol.0 as usize) < self.strings.len() {
284 self.interned(self.gensyms[(!0 - symbol.0) as usize])
288 fn gensym(&mut self, string: &str) -> Symbol {
289 let symbol = self.intern(string);
290 self.gensymed(symbol)
293 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
294 self.gensyms.push(symbol);
295 Symbol(!0 - self.gensyms.len() as u32 + 1)
298 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
299 symbol.0 as usize >= self.strings.len()
302 pub fn get(&self, symbol: Symbol) -> &str {
303 match self.strings.get(symbol.0 as usize) {
304 Some(string) => string,
305 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
310 // In this macro, there is the requirement that the name (the number) must be monotonically
311 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
312 // except starting from the next number instead of zero.
313 macro_rules! declare_keywords {(
314 $( ($index: expr, $konst: ident, $string: expr) )*
317 use super::{Symbol, Ident};
318 #[derive(Clone, Copy, PartialEq, Eq)]
323 #[inline] pub fn ident(self) -> Ident { self.ident }
324 #[inline] pub fn name(self) -> Symbol { self.ident.name }
327 #[allow(non_upper_case_globals)]
328 pub const $konst: Keyword = Keyword {
329 ident: Ident::with_empty_ctxt(super::Symbol($index))
333 impl ::std::str::FromStr for Keyword {
336 fn from_str(s: &str) -> Result<Self, ()> {
338 $($string => Ok($konst),)*
346 pub fn fresh() -> Self {
347 Interner::prefill(&[$($string,)*])
352 // NB: leaving holes in the ident table is bad! a different ident will get
353 // interned with the id from the hole, but it will be between the min and max
354 // of the reserved words, and thus tagged as "reserved".
355 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
356 // this should be rarely necessary though if the keywords are kept in alphabetic order.
358 // Special reserved identifiers used internally for elided lifetimes,
359 // unnamed method parameters, crate root module, error recovery etc.
361 (1, CrateRoot, "{{root}}")
362 (2, DollarCrate, "$crate")
365 // Keywords used in the language.
370 (8, Continue, "continue")
374 (12, Extern, "extern")
389 (27, Return, "return")
390 (28, SelfValue, "self")
391 (29, SelfType, "Self")
392 (30, Static, "static")
393 (31, Struct, "struct")
398 (36, Unsafe, "unsafe")
403 // Keywords reserved for future use.
404 (40, Abstract, "abstract")
405 (41, Become, "become")
409 (45, Override, "override")
411 (47, Typeof, "typeof")
412 (48, Unsized, "unsized")
413 (49, Virtual, "virtual")
416 // Edition-specific keywords reserved for future use.
417 (51, Async, "async") // >= 2018 Edition Only
419 // Special lifetime names
420 (52, UnderscoreLifetime, "'_")
421 (53, StaticLifetime, "'static")
423 // Weak keywords, have special meaning only in specific contexts.
426 (56, Default, "default")
429 (59, Existential, "existential")
433 fn is_unused_keyword_2018(self) -> bool {
434 self == keywords::Async.name()
439 // Returns true for reserved identifiers used internally for elided lifetimes,
440 // unnamed method parameters, crate root module, error recovery etc.
441 pub fn is_special(self) -> bool {
442 self.name <= keywords::Underscore.name()
445 /// Returns `true` if the token is a keyword used in the language.
446 pub fn is_used_keyword(self) -> bool {
447 self.name >= keywords::As.name() && self.name <= keywords::While.name()
450 /// Returns `true` if the token is a keyword reserved for possible future use.
451 pub fn is_unused_keyword(self) -> bool {
452 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
453 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
454 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
457 /// Returns `true` if the token is either a special identifier or a keyword.
458 pub fn is_reserved(self) -> bool {
459 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
462 /// A keyword or reserved identifier that can be used as a path segment.
463 pub fn is_path_segment_keyword(self) -> bool {
464 self.name == keywords::Super.name() ||
465 self.name == keywords::SelfValue.name() ||
466 self.name == keywords::SelfType.name() ||
467 self.name == keywords::Extern.name() ||
468 self.name == keywords::Crate.name() ||
469 self.name == keywords::CrateRoot.name() ||
470 self.name == keywords::DollarCrate.name()
473 // We see this identifier in a normal identifier position, like variable name or a type.
474 // How was it written originally? Did it use the raw form? Let's try to guess.
475 pub fn is_raw_guess(self) -> bool {
476 self.name != keywords::Invalid.name() &&
477 self.is_reserved() && !self.is_path_segment_keyword()
481 // If an interner exists, return it. Otherwise, prepare a fresh one.
483 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
484 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
487 /// Represents a string stored in the interner. Because the interner outlives any thread
488 /// which uses this type, we can safely treat `string` which points to interner data,
489 /// as an immortal string, as long as this type never crosses between threads.
490 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
491 // by creating a new thread right after constructing the interner
492 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
493 pub struct LocalInternedString {
494 string: &'static str,
497 impl LocalInternedString {
498 pub fn as_interned_str(self) -> InternedString {
500 symbol: Symbol::intern(self.string)
505 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
507 str: ::std::convert::AsRef<U>
509 fn as_ref(&self) -> &U {
514 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
515 fn eq(&self, other: &T) -> bool {
516 self.string == other.deref()
520 impl ::std::cmp::PartialEq<LocalInternedString> for str {
521 fn eq(&self, other: &LocalInternedString) -> bool {
526 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
527 fn eq(&self, other: &LocalInternedString) -> bool {
528 *self == other.string
532 impl ::std::cmp::PartialEq<LocalInternedString> for String {
533 fn eq(&self, other: &LocalInternedString) -> bool {
538 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
539 fn eq(&self, other: &LocalInternedString) -> bool {
540 *self == other.string
544 impl !Send for LocalInternedString {}
545 impl !Sync for LocalInternedString {}
547 impl ::std::ops::Deref for LocalInternedString {
549 fn deref(&self) -> &str { self.string }
552 impl fmt::Debug for LocalInternedString {
553 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
554 fmt::Debug::fmt(self.string, f)
558 impl fmt::Display for LocalInternedString {
559 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
560 fmt::Display::fmt(self.string, f)
564 impl Decodable for LocalInternedString {
565 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
566 Ok(Symbol::intern(&d.read_str()?).as_str())
570 impl Encodable for LocalInternedString {
571 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
572 s.emit_str(self.string)
576 /// Represents a string stored in the string interner
577 #[derive(Clone, Copy, Eq)]
578 pub struct InternedString {
582 impl InternedString {
583 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
584 let str = with_interner(|interner| {
585 interner.get(self.symbol) as *const str
587 // This is safe because the interner keeps string alive until it is dropped.
588 // We can access it because we know the interner is still alive since we use a
589 // scoped thread local to access it, and it was alive at the begining of this scope
593 pub fn as_symbol(self) -> Symbol {
597 pub fn as_str(self) -> LocalInternedString {
602 impl Hash for InternedString {
603 fn hash<H: Hasher>(&self, state: &mut H) {
604 self.with(|str| str.hash(state))
608 impl PartialOrd<InternedString> for InternedString {
609 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
610 if self.symbol == other.symbol {
611 return Some(Ordering::Equal);
613 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
617 impl Ord for InternedString {
618 fn cmp(&self, other: &InternedString) -> Ordering {
619 if self.symbol == other.symbol {
620 return Ordering::Equal;
622 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
626 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
627 fn eq(&self, other: &T) -> bool {
628 self.with(|string| string == other.deref())
632 impl PartialEq<InternedString> for InternedString {
633 fn eq(&self, other: &InternedString) -> bool {
634 self.symbol == other.symbol
638 impl PartialEq<InternedString> for str {
639 fn eq(&self, other: &InternedString) -> bool {
640 other.with(|string| self == string)
644 impl<'a> PartialEq<InternedString> for &'a str {
645 fn eq(&self, other: &InternedString) -> bool {
646 other.with(|string| *self == string)
650 impl PartialEq<InternedString> for String {
651 fn eq(&self, other: &InternedString) -> bool {
652 other.with(|string| self == string)
656 impl<'a> PartialEq<InternedString> for &'a String {
657 fn eq(&self, other: &InternedString) -> bool {
658 other.with(|string| *self == string)
662 impl ::std::convert::From<InternedString> for String {
663 fn from(val: InternedString) -> String {
664 val.as_symbol().to_string()
668 impl fmt::Debug for InternedString {
669 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
670 self.with(|str| fmt::Debug::fmt(&str, f))
674 impl fmt::Display for InternedString {
675 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
676 self.with(|str| fmt::Display::fmt(&str, f))
680 impl Decodable for InternedString {
681 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
682 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
686 impl Encodable for InternedString {
687 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
688 self.with(|string| s.emit_str(string))
698 fn interner_tests() {
699 let mut i: Interner = Interner::new();
700 // first one is zero:
701 assert_eq!(i.intern("dog"), Symbol(0));
702 // re-use gets the same entry:
703 assert_eq!(i.intern("dog"), Symbol(0));
704 // different string gets a different #:
705 assert_eq!(i.intern("cat"), Symbol(1));
706 assert_eq!(i.intern("cat"), Symbol(1));
707 // dog is still at zero
708 assert_eq!(i.intern("dog"), Symbol(0));
709 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
710 // gensym of same string gets new number :
711 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
712 // gensym of *existing* string gets new number:
713 assert_eq!(i.gensym("dog"), Symbol(4294967293));
717 fn without_first_quote_test() {
718 GLOBALS.set(&Globals::new(), || {
719 let i = Ident::from_str("'break");
720 assert_eq!(i.without_first_quote().name, keywords::Break.name());