1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
16 use hygiene::SyntaxContext;
17 use {Span, DUMMY_SP, GLOBALS};
19 use rustc_data_structures::fx::FxHashMap;
20 use arena::DroplessArena;
21 use serialize::{Decodable, Decoder, Encodable, Encoder};
24 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
25 use std::hash::{Hash, Hasher};
27 #[derive(Copy, Clone, Eq)]
35 pub const fn new(name: Symbol, span: Span) -> Ident {
39 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
40 Ident::new(name, DUMMY_SP)
43 /// Maps an interned string to an identifier with an empty syntax context.
44 pub fn from_interned_str(string: InternedString) -> Ident {
45 Ident::with_empty_ctxt(string.as_symbol())
48 /// Maps a string to an identifier with an empty syntax context.
49 pub fn from_str(string: &str) -> Ident {
50 Ident::with_empty_ctxt(Symbol::intern(string))
53 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
54 pub fn with_span_pos(self, span: Span) -> Ident {
55 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
58 pub fn without_first_quote(self) -> Ident {
59 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
62 /// "Normalize" ident for use in comparisons using "item hygiene".
63 /// Identifiers with same string value become same if they came from the same "modern" macro
64 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
65 /// different "modern" macros.
66 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
67 pub fn modern(self) -> Ident {
68 Ident::new(self.name, self.span.modern())
71 pub fn gensym(self) -> Ident {
72 Ident::new(self.name.gensymed(), self.span)
75 pub fn as_str(self) -> LocalInternedString {
79 pub fn as_interned_str(self) -> InternedString {
80 self.name.as_interned_str()
84 impl PartialEq for Ident {
85 fn eq(&self, rhs: &Self) -> bool {
86 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
91 fn hash<H: Hasher>(&self, state: &mut H) {
92 self.name.hash(state);
93 self.span.ctxt().hash(state);
97 impl fmt::Debug for Ident {
98 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
99 write!(f, "{}{:?}", self.name, self.span.ctxt())
103 impl fmt::Display for Ident {
104 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
105 fmt::Display::fmt(&self.name, f)
109 impl Encodable for Ident {
110 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
111 if self.span.ctxt().modern() == SyntaxContext::empty() {
112 s.emit_str(&self.as_str())
113 } else { // FIXME(jseyfried) intercrate hygiene
114 let mut string = "#".to_owned();
115 string.push_str(&self.as_str());
121 impl Decodable for Ident {
122 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
123 let string = d.read_str()?;
124 Ok(if !string.starts_with('#') {
125 Ident::from_str(&string)
126 } else { // FIXME(jseyfried) intercrate hygiene
127 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
132 /// A symbol is an interned or gensymed string.
133 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
134 pub struct Symbol(u32);
136 // The interner is pointed to by a thread local value which is only set on the main thread
137 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
138 // to avoid panics and other errors, even though it would be memory safe to do so.
139 #[cfg(not(parallel_queries))]
140 impl !Send for Symbol { }
141 #[cfg(not(parallel_queries))]
142 impl !Sync for Symbol { }
145 /// Maps a string to its interned representation.
146 pub fn intern(string: &str) -> Self {
147 with_interner(|interner| interner.intern(string))
150 pub fn interned(self) -> Self {
151 with_interner(|interner| interner.interned(self))
154 /// gensym's a new usize, using the current interner.
155 pub fn gensym(string: &str) -> Self {
156 with_interner(|interner| interner.gensym(string))
159 pub fn gensymed(self) -> Self {
160 with_interner(|interner| interner.gensymed(self))
163 pub fn as_str(self) -> LocalInternedString {
164 with_interner(|interner| unsafe {
165 LocalInternedString {
166 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
171 pub fn as_interned_str(self) -> InternedString {
172 with_interner(|interner| InternedString {
173 symbol: interner.interned(self)
177 pub fn as_u32(self) -> u32 {
182 impl fmt::Debug for Symbol {
183 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
184 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
186 write!(f, "{}({})", self, self.0)
188 write!(f, "{}", self)
193 impl fmt::Display for Symbol {
194 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
195 fmt::Display::fmt(&self.as_str(), f)
199 impl Encodable for Symbol {
200 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
201 s.emit_str(&self.as_str())
205 impl Decodable for Symbol {
206 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
207 Ok(Symbol::intern(&d.read_str()?))
211 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
212 fn eq(&self, other: &T) -> bool {
213 self.as_str() == other.deref()
217 // The &'static strs in this type actually point into the arena
218 pub struct Interner {
219 arena: DroplessArena,
220 names: FxHashMap<&'static str, Symbol>,
221 strings: Vec<&'static str>,
222 gensyms: Vec<Symbol>,
226 pub fn new() -> Self {
228 arena: DroplessArena::new(),
229 names: Default::default(),
230 strings: Default::default(),
231 gensyms: Default::default(),
235 fn prefill(init: &[&str]) -> Self {
236 let mut this = Interner::new();
237 for &string in init {
239 // We can't allocate empty strings in the arena, so handle this here
240 let name = Symbol(this.strings.len() as u32);
241 this.names.insert("", name);
242 this.strings.push("");
250 pub fn intern(&mut self, string: &str) -> Symbol {
251 if let Some(&name) = self.names.get(string) {
255 let name = Symbol(self.strings.len() as u32);
257 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
258 let string: &str = unsafe {
259 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
261 // It is safe to extend the arena allocation to 'static because we only access
262 // these while the arena is still alive
263 let string: &'static str = unsafe {
264 &*(string as *const str)
266 self.strings.push(string);
267 self.names.insert(string, name);
271 pub fn interned(&self, symbol: Symbol) -> Symbol {
272 if (symbol.0 as usize) < self.strings.len() {
275 self.interned(self.gensyms[(!0 - symbol.0) as usize])
279 fn gensym(&mut self, string: &str) -> Symbol {
280 let symbol = self.intern(string);
281 self.gensymed(symbol)
284 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
285 self.gensyms.push(symbol);
286 Symbol(!0 - self.gensyms.len() as u32 + 1)
289 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
290 symbol.0 as usize >= self.strings.len()
293 pub fn get(&self, symbol: Symbol) -> &str {
294 match self.strings.get(symbol.0 as usize) {
295 Some(string) => string,
296 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
301 // In this macro, there is the requirement that the name (the number) must be monotonically
302 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
303 // except starting from the next number instead of zero.
304 macro_rules! declare_keywords {(
305 $( ($index: expr, $konst: ident, $string: expr) )*
308 use super::{Symbol, Ident};
309 #[derive(Clone, Copy, PartialEq, Eq)]
314 #[inline] pub fn ident(self) -> Ident { self.ident }
315 #[inline] pub fn name(self) -> Symbol { self.ident.name }
318 #[allow(non_upper_case_globals)]
319 pub const $konst: Keyword = Keyword {
320 ident: Ident::with_empty_ctxt(super::Symbol($index))
324 impl ::std::str::FromStr for Keyword {
327 fn from_str(s: &str) -> Result<Self, ()> {
329 $($string => Ok($konst),)*
337 pub fn fresh() -> Self {
338 Interner::prefill(&[$($string,)*])
343 // NB: leaving holes in the ident table is bad! a different ident will get
344 // interned with the id from the hole, but it will be between the min and max
345 // of the reserved words, and thus tagged as "reserved".
346 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
347 // this should be rarely necessary though if the keywords are kept in alphabetic order.
349 // Special reserved identifiers used internally for elided lifetimes,
350 // unnamed method parameters, crate root module, error recovery etc.
352 (1, CrateRoot, "{{root}}")
353 (2, DollarCrate, "$crate")
356 // Keywords used in the language.
361 (8, Continue, "continue")
365 (12, Extern, "extern")
380 (27, Return, "return")
381 (28, SelfValue, "self")
382 (29, SelfType, "Self")
383 (30, Static, "static")
384 (31, Struct, "struct")
389 (36, Unsafe, "unsafe")
394 // Keywords reserved for future use.
395 (40, Abstract, "abstract")
396 (41, Become, "become")
400 (45, Override, "override")
402 (47, Typeof, "typeof")
403 (48, Unsized, "unsized")
404 (49, Virtual, "virtual")
407 // Edition-specific keywords reserved for future use.
408 (51, Async, "async") // >= 2018 Edition Only
410 // Special lifetime names
411 (52, UnderscoreLifetime, "'_")
412 (53, StaticLifetime, "'static")
414 // Weak keywords, have special meaning only in specific contexts.
417 (56, Default, "default")
423 fn is_unused_keyword_2018(self) -> bool {
424 self == keywords::Async.name()
429 // Returns true for reserved identifiers used internally for elided lifetimes,
430 // unnamed method parameters, crate root module, error recovery etc.
431 pub fn is_special(self) -> bool {
432 self.name <= keywords::Underscore.name()
435 /// Returns `true` if the token is a keyword used in the language.
436 pub fn is_used_keyword(self) -> bool {
437 self.name >= keywords::As.name() && self.name <= keywords::While.name()
440 /// Returns `true` if the token is a keyword reserved for possible future use.
441 pub fn is_unused_keyword(self) -> bool {
442 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
443 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
444 self.name.is_unused_keyword_2018() && self.span.edition() == Edition::Edition2018
447 /// Returns `true` if the token is either a special identifier or a keyword.
448 pub fn is_reserved(self) -> bool {
449 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
452 /// A keyword or reserved identifier that can be used as a path segment.
453 pub fn is_path_segment_keyword(self) -> bool {
454 self.name == keywords::Super.name() ||
455 self.name == keywords::SelfValue.name() ||
456 self.name == keywords::SelfType.name() ||
457 self.name == keywords::Extern.name() ||
458 self.name == keywords::Crate.name() ||
459 self.name == keywords::CrateRoot.name() ||
460 self.name == keywords::DollarCrate.name()
463 // We see this identifier in a normal identifier position, like variable name or a type.
464 // How was it written originally? Did it use the raw form? Let's try to guess.
465 pub fn is_raw_guess(self) -> bool {
466 self.name != keywords::Invalid.name() &&
467 self.is_reserved() && !self.is_path_segment_keyword()
471 // If an interner exists, return it. Otherwise, prepare a fresh one.
473 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
474 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
477 /// Represents a string stored in the interner. Because the interner outlives any thread
478 /// which uses this type, we can safely treat `string` which points to interner data,
479 /// as an immortal string, as long as this type never crosses between threads.
480 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
481 // by creating a new thread right after constructing the interner
482 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
483 pub struct LocalInternedString {
484 string: &'static str,
487 impl LocalInternedString {
488 pub fn as_interned_str(self) -> InternedString {
490 symbol: Symbol::intern(self.string)
495 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
497 str: ::std::convert::AsRef<U>
499 fn as_ref(&self) -> &U {
504 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
505 fn eq(&self, other: &T) -> bool {
506 self.string == other.deref()
510 impl ::std::cmp::PartialEq<LocalInternedString> for str {
511 fn eq(&self, other: &LocalInternedString) -> bool {
516 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
517 fn eq(&self, other: &LocalInternedString) -> bool {
518 *self == other.string
522 impl ::std::cmp::PartialEq<LocalInternedString> for String {
523 fn eq(&self, other: &LocalInternedString) -> bool {
528 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
529 fn eq(&self, other: &LocalInternedString) -> bool {
530 *self == other.string
534 impl !Send for LocalInternedString {}
535 impl !Sync for LocalInternedString {}
537 impl ::std::ops::Deref for LocalInternedString {
539 fn deref(&self) -> &str { self.string }
542 impl fmt::Debug for LocalInternedString {
543 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
544 fmt::Debug::fmt(self.string, f)
548 impl fmt::Display for LocalInternedString {
549 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
550 fmt::Display::fmt(self.string, f)
554 impl Decodable for LocalInternedString {
555 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
556 Ok(Symbol::intern(&d.read_str()?).as_str())
560 impl Encodable for LocalInternedString {
561 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
562 s.emit_str(self.string)
566 /// Represents a string stored in the string interner
567 #[derive(Clone, Copy, Eq)]
568 pub struct InternedString {
572 impl InternedString {
573 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
574 let str = with_interner(|interner| {
575 interner.get(self.symbol) as *const str
577 // This is safe because the interner keeps string alive until it is dropped.
578 // We can access it because we know the interner is still alive since we use a
579 // scoped thread local to access it, and it was alive at the begining of this scope
583 pub fn as_symbol(self) -> Symbol {
587 pub fn as_str(self) -> LocalInternedString {
592 impl Hash for InternedString {
593 fn hash<H: Hasher>(&self, state: &mut H) {
594 self.with(|str| str.hash(state))
598 impl PartialOrd<InternedString> for InternedString {
599 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
600 if self.symbol == other.symbol {
601 return Some(Ordering::Equal);
603 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
607 impl Ord for InternedString {
608 fn cmp(&self, other: &InternedString) -> Ordering {
609 if self.symbol == other.symbol {
610 return Ordering::Equal;
612 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
616 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
617 fn eq(&self, other: &T) -> bool {
618 self.with(|string| string == other.deref())
622 impl PartialEq<InternedString> for InternedString {
623 fn eq(&self, other: &InternedString) -> bool {
624 self.symbol == other.symbol
628 impl PartialEq<InternedString> for str {
629 fn eq(&self, other: &InternedString) -> bool {
630 other.with(|string| self == string)
634 impl<'a> PartialEq<InternedString> for &'a str {
635 fn eq(&self, other: &InternedString) -> bool {
636 other.with(|string| *self == string)
640 impl PartialEq<InternedString> for String {
641 fn eq(&self, other: &InternedString) -> bool {
642 other.with(|string| self == string)
646 impl<'a> PartialEq<InternedString> for &'a String {
647 fn eq(&self, other: &InternedString) -> bool {
648 other.with(|string| *self == string)
652 impl ::std::convert::From<InternedString> for String {
653 fn from(val: InternedString) -> String {
654 val.as_symbol().to_string()
658 impl fmt::Debug for InternedString {
659 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
660 self.with(|str| fmt::Debug::fmt(&str, f))
664 impl fmt::Display for InternedString {
665 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
666 self.with(|str| fmt::Display::fmt(&str, f))
670 impl Decodable for InternedString {
671 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
672 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
676 impl Encodable for InternedString {
677 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
678 self.with(|string| s.emit_str(string))
688 fn interner_tests() {
689 let mut i: Interner = Interner::new();
690 // first one is zero:
691 assert_eq!(i.intern("dog"), Symbol(0));
692 // re-use gets the same entry:
693 assert_eq!(i.intern("dog"), Symbol(0));
694 // different string gets a different #:
695 assert_eq!(i.intern("cat"), Symbol(1));
696 assert_eq!(i.intern("cat"), Symbol(1));
697 // dog is still at zero
698 assert_eq!(i.intern("dog"), Symbol(0));
699 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
700 // gensym of same string gets new number :
701 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
702 // gensym of *existing* string gets new number:
703 assert_eq!(i.gensym("dog"), Symbol(4294967293));
707 fn without_first_quote_test() {
708 GLOBALS.set(&Globals::new(), || {
709 let i = Ident::from_str("'break");
710 assert_eq!(i.without_first_quote().name, keywords::Break.name());