1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 // "Normalize" ident for use in comparisons using "local variable hygiene".
72 // Identifiers with same string value become same if they came from the same non-transparent
73 // macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
74 // non-transparent macros.
75 // Technically, this operation strips all transparent marks from ident's syntactic context.
76 pub fn modern_and_legacy(self) -> Ident {
77 Ident::new(self.name, self.span.modern_and_legacy())
80 pub fn gensym(self) -> Ident {
81 Ident::new(self.name.gensymed(), self.span)
84 pub fn as_str(self) -> LocalInternedString {
88 pub fn as_interned_str(self) -> InternedString {
89 self.name.as_interned_str()
93 impl PartialEq for Ident {
94 fn eq(&self, rhs: &Self) -> bool {
95 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.name.hash(state);
102 self.span.ctxt().hash(state);
106 impl fmt::Debug for Ident {
107 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108 write!(f, "{}{:?}", self.name, self.span.ctxt())
112 impl fmt::Display for Ident {
113 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114 fmt::Display::fmt(&self.name, f)
118 impl Encodable for Ident {
119 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
120 if self.span.ctxt().modern() == SyntaxContext::empty() {
121 s.emit_str(&self.as_str())
122 } else { // FIXME(jseyfried) intercrate hygiene
123 let mut string = "#".to_owned();
124 string.push_str(&self.as_str());
130 impl Decodable for Ident {
131 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
132 let string = d.read_str()?;
133 Ok(if !string.starts_with('#') {
134 Ident::from_str(&string)
135 } else { // FIXME(jseyfried) intercrate hygiene
136 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
141 /// A symbol is an interned or gensymed string.
142 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
143 pub struct Symbol(u32);
145 // The interner is pointed to by a thread local value which is only set on the main thread
146 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
147 // to avoid panics and other errors, even though it would be memory safe to do so.
148 #[cfg(not(parallel_queries))]
149 impl !Send for Symbol { }
150 #[cfg(not(parallel_queries))]
151 impl !Sync for Symbol { }
154 /// Maps a string to its interned representation.
155 pub fn intern(string: &str) -> Self {
156 with_interner(|interner| interner.intern(string))
159 pub fn interned(self) -> Self {
160 with_interner(|interner| interner.interned(self))
163 /// gensym's a new usize, using the current interner.
164 pub fn gensym(string: &str) -> Self {
165 with_interner(|interner| interner.gensym(string))
168 pub fn gensymed(self) -> Self {
169 with_interner(|interner| interner.gensymed(self))
172 pub fn as_str(self) -> LocalInternedString {
173 with_interner(|interner| unsafe {
174 LocalInternedString {
175 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
180 pub fn as_interned_str(self) -> InternedString {
181 with_interner(|interner| InternedString {
182 symbol: interner.interned(self)
186 pub fn as_u32(self) -> u32 {
191 impl fmt::Debug for Symbol {
192 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
193 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
195 write!(f, "{}({})", self, self.0)
197 write!(f, "{}", self)
202 impl fmt::Display for Symbol {
203 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
204 fmt::Display::fmt(&self.as_str(), f)
208 impl Encodable for Symbol {
209 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
210 s.emit_str(&self.as_str())
214 impl Decodable for Symbol {
215 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
216 Ok(Symbol::intern(&d.read_str()?))
220 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
221 fn eq(&self, other: &T) -> bool {
222 self.as_str() == other.deref()
226 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 pub fn new() -> Self {
237 arena: DroplessArena::new(),
238 names: Default::default(),
239 strings: Default::default(),
240 gensyms: Default::default(),
244 fn prefill(init: &[&str]) -> Self {
245 let mut this = Interner::new();
246 for &string in init {
248 // We can't allocate empty strings in the arena, so handle this here
249 let name = Symbol(this.strings.len() as u32);
250 this.names.insert("", name);
251 this.strings.push("");
259 pub fn intern(&mut self, string: &str) -> Symbol {
260 if let Some(&name) = self.names.get(string) {
264 let name = Symbol(self.strings.len() as u32);
266 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
267 let string: &str = unsafe {
268 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
270 // It is safe to extend the arena allocation to 'static because we only access
271 // these while the arena is still alive
272 let string: &'static str = unsafe {
273 &*(string as *const str)
275 self.strings.push(string);
276 self.names.insert(string, name);
280 pub fn interned(&self, symbol: Symbol) -> Symbol {
281 if (symbol.0 as usize) < self.strings.len() {
284 self.interned(self.gensyms[(!0 - symbol.0) as usize])
288 fn gensym(&mut self, string: &str) -> Symbol {
289 let symbol = self.intern(string);
290 self.gensymed(symbol)
293 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
294 self.gensyms.push(symbol);
295 Symbol(!0 - self.gensyms.len() as u32 + 1)
298 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
299 symbol.0 as usize >= self.strings.len()
302 pub fn get(&self, symbol: Symbol) -> &str {
303 match self.strings.get(symbol.0 as usize) {
304 Some(string) => string,
305 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
310 // In this macro, there is the requirement that the name (the number) must be monotonically
311 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
312 // except starting from the next number instead of zero.
313 macro_rules! declare_keywords {(
314 $( ($index: expr, $konst: ident, $string: expr) )*
317 use super::{Symbol, Ident};
318 #[derive(Clone, Copy, PartialEq, Eq)]
323 #[inline] pub fn ident(self) -> Ident { self.ident }
324 #[inline] pub fn name(self) -> Symbol { self.ident.name }
327 #[allow(non_upper_case_globals)]
328 pub const $konst: Keyword = Keyword {
329 ident: Ident::with_empty_ctxt(super::Symbol($index))
333 impl ::std::str::FromStr for Keyword {
336 fn from_str(s: &str) -> Result<Self, ()> {
338 $($string => Ok($konst),)*
346 pub fn fresh() -> Self {
347 Interner::prefill(&[$($string,)*])
352 // NB: leaving holes in the ident table is bad! a different ident will get
353 // interned with the id from the hole, but it will be between the min and max
354 // of the reserved words, and thus tagged as "reserved".
355 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
356 // this should be rarely necessary though if the keywords are kept in alphabetic order.
358 // Special reserved identifiers used internally for elided lifetimes,
359 // unnamed method parameters, crate root module, error recovery etc.
361 (1, CrateRoot, "{{root}}")
362 (2, DollarCrate, "$crate")
365 // Keywords used in the language.
370 (8, Continue, "continue")
374 (12, Extern, "extern")
389 (27, Return, "return")
390 (28, SelfValue, "self")
391 (29, SelfType, "Self")
392 (30, Static, "static")
393 (31, Struct, "struct")
398 (36, Unsafe, "unsafe")
403 // Keywords reserved for future use.
404 (40, Abstract, "abstract")
405 (41, Become, "become")
409 (45, Override, "override")
411 (47, Typeof, "typeof")
412 (48, Unsized, "unsized")
413 (49, Virtual, "virtual")
416 // Edition-specific keywords reserved for future use.
417 (51, Async, "async") // >= 2018 Edition Only
419 // Special lifetime names
420 (52, UnderscoreLifetime, "'_")
421 (53, StaticLifetime, "'static")
423 // Weak keywords, have special meaning only in specific contexts.
426 (56, Default, "default")
432 fn is_unused_keyword_2018(self) -> bool {
433 self == keywords::Async.name()
438 // Returns true for reserved identifiers used internally for elided lifetimes,
439 // unnamed method parameters, crate root module, error recovery etc.
440 pub fn is_special(self) -> bool {
441 self.name <= keywords::Underscore.name()
444 /// Returns `true` if the token is a keyword used in the language.
445 pub fn is_used_keyword(self) -> bool {
446 self.name >= keywords::As.name() && self.name <= keywords::While.name()
449 /// Returns `true` if the token is a keyword reserved for possible future use.
450 pub fn is_unused_keyword(self) -> bool {
451 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
452 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
453 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
456 /// Returns `true` if the token is either a special identifier or a keyword.
457 pub fn is_reserved(self) -> bool {
458 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
461 /// A keyword or reserved identifier that can be used as a path segment.
462 pub fn is_path_segment_keyword(self) -> bool {
463 self.name == keywords::Super.name() ||
464 self.name == keywords::SelfValue.name() ||
465 self.name == keywords::SelfType.name() ||
466 self.name == keywords::Extern.name() ||
467 self.name == keywords::Crate.name() ||
468 self.name == keywords::CrateRoot.name() ||
469 self.name == keywords::DollarCrate.name()
472 // We see this identifier in a normal identifier position, like variable name or a type.
473 // How was it written originally? Did it use the raw form? Let's try to guess.
474 pub fn is_raw_guess(self) -> bool {
475 self.name != keywords::Invalid.name() &&
476 self.is_reserved() && !self.is_path_segment_keyword()
480 // If an interner exists, return it. Otherwise, prepare a fresh one.
482 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
483 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
486 /// Represents a string stored in the interner. Because the interner outlives any thread
487 /// which uses this type, we can safely treat `string` which points to interner data,
488 /// as an immortal string, as long as this type never crosses between threads.
489 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
490 // by creating a new thread right after constructing the interner
491 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
492 pub struct LocalInternedString {
493 string: &'static str,
496 impl LocalInternedString {
497 pub fn as_interned_str(self) -> InternedString {
499 symbol: Symbol::intern(self.string)
504 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
506 str: ::std::convert::AsRef<U>
508 fn as_ref(&self) -> &U {
513 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
514 fn eq(&self, other: &T) -> bool {
515 self.string == other.deref()
519 impl ::std::cmp::PartialEq<LocalInternedString> for str {
520 fn eq(&self, other: &LocalInternedString) -> bool {
525 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
526 fn eq(&self, other: &LocalInternedString) -> bool {
527 *self == other.string
531 impl ::std::cmp::PartialEq<LocalInternedString> for String {
532 fn eq(&self, other: &LocalInternedString) -> bool {
537 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
538 fn eq(&self, other: &LocalInternedString) -> bool {
539 *self == other.string
543 impl !Send for LocalInternedString {}
544 impl !Sync for LocalInternedString {}
546 impl ::std::ops::Deref for LocalInternedString {
548 fn deref(&self) -> &str { self.string }
551 impl fmt::Debug for LocalInternedString {
552 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
553 fmt::Debug::fmt(self.string, f)
557 impl fmt::Display for LocalInternedString {
558 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
559 fmt::Display::fmt(self.string, f)
563 impl Decodable for LocalInternedString {
564 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
565 Ok(Symbol::intern(&d.read_str()?).as_str())
569 impl Encodable for LocalInternedString {
570 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
571 s.emit_str(self.string)
575 /// Represents a string stored in the string interner
576 #[derive(Clone, Copy, Eq)]
577 pub struct InternedString {
581 impl InternedString {
582 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
583 let str = with_interner(|interner| {
584 interner.get(self.symbol) as *const str
586 // This is safe because the interner keeps string alive until it is dropped.
587 // We can access it because we know the interner is still alive since we use a
588 // scoped thread local to access it, and it was alive at the begining of this scope
592 pub fn as_symbol(self) -> Symbol {
596 pub fn as_str(self) -> LocalInternedString {
601 impl Hash for InternedString {
602 fn hash<H: Hasher>(&self, state: &mut H) {
603 self.with(|str| str.hash(state))
607 impl PartialOrd<InternedString> for InternedString {
608 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
609 if self.symbol == other.symbol {
610 return Some(Ordering::Equal);
612 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
616 impl Ord for InternedString {
617 fn cmp(&self, other: &InternedString) -> Ordering {
618 if self.symbol == other.symbol {
619 return Ordering::Equal;
621 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
625 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
626 fn eq(&self, other: &T) -> bool {
627 self.with(|string| string == other.deref())
631 impl PartialEq<InternedString> for InternedString {
632 fn eq(&self, other: &InternedString) -> bool {
633 self.symbol == other.symbol
637 impl PartialEq<InternedString> for str {
638 fn eq(&self, other: &InternedString) -> bool {
639 other.with(|string| self == string)
643 impl<'a> PartialEq<InternedString> for &'a str {
644 fn eq(&self, other: &InternedString) -> bool {
645 other.with(|string| *self == string)
649 impl PartialEq<InternedString> for String {
650 fn eq(&self, other: &InternedString) -> bool {
651 other.with(|string| self == string)
655 impl<'a> PartialEq<InternedString> for &'a String {
656 fn eq(&self, other: &InternedString) -> bool {
657 other.with(|string| *self == string)
661 impl ::std::convert::From<InternedString> for String {
662 fn from(val: InternedString) -> String {
663 val.as_symbol().to_string()
667 impl fmt::Debug for InternedString {
668 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
669 self.with(|str| fmt::Debug::fmt(&str, f))
673 impl fmt::Display for InternedString {
674 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
675 self.with(|str| fmt::Display::fmt(&str, f))
679 impl Decodable for InternedString {
680 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
681 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
685 impl Encodable for InternedString {
686 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
687 self.with(|string| s.emit_str(string))
697 fn interner_tests() {
698 let mut i: Interner = Interner::new();
699 // first one is zero:
700 assert_eq!(i.intern("dog"), Symbol(0));
701 // re-use gets the same entry:
702 assert_eq!(i.intern("dog"), Symbol(0));
703 // different string gets a different #:
704 assert_eq!(i.intern("cat"), Symbol(1));
705 assert_eq!(i.intern("cat"), Symbol(1));
706 // dog is still at zero
707 assert_eq!(i.intern("dog"), Symbol(0));
708 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
709 // gensym of same string gets new number :
710 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
711 // gensym of *existing* string gets new number:
712 assert_eq!(i.gensym("dog"), Symbol(4294967293));
716 fn without_first_quote_test() {
717 GLOBALS.set(&Globals::new(), || {
718 let i = Ident::from_str("'break");
719 assert_eq!(i.without_first_quote().name, keywords::Break.name());