1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
15 use hygiene::SyntaxContext;
16 use {Span, DUMMY_SP, GLOBALS};
18 use rustc_data_structures::fx::FxHashMap;
19 use arena::DroplessArena;
20 use serialize::{Decodable, Decoder, Encodable, Encoder};
23 use std::cmp::{PartialEq, Ordering, PartialOrd, Ord};
24 use std::hash::{Hash, Hasher};
26 #[derive(Copy, Clone, Eq)]
34 pub const fn new(name: Symbol, span: Span) -> Ident {
38 pub const fn with_empty_ctxt(name: Symbol) -> Ident {
39 Ident::new(name, DUMMY_SP)
42 /// Maps an interned string to an identifier with an empty syntax context.
43 pub fn from_interned_str(string: InternedString) -> Ident {
44 Ident::with_empty_ctxt(string.as_symbol())
47 /// Maps a string to an identifier with an empty syntax context.
48 pub fn from_str(string: &str) -> Ident {
49 Ident::with_empty_ctxt(Symbol::intern(string))
52 /// Replace `lo` and `hi` with those from `span`, but keep hygiene context.
53 pub fn with_span_pos(self, span: Span) -> Ident {
54 Ident::new(self.name, span.with_ctxt(self.span.ctxt()))
57 pub fn without_first_quote(self) -> Ident {
58 Ident::new(Symbol::intern(self.as_str().trim_left_matches('\'')), self.span)
61 /// "Normalize" ident for use in comparisons using "item hygiene".
62 /// Identifiers with same string value become same if they came from the same "modern" macro
63 /// (e.g. `macro` item, but not `macro_rules` item) and stay different if they came from
64 /// different "modern" macros.
65 /// Technically, this operation strips all non-opaque marks from ident's syntactic context.
66 pub fn modern(self) -> Ident {
67 Ident::new(self.name, self.span.modern())
70 /// "Normalize" ident for use in comparisons using "local variable hygiene".
71 /// Identifiers with same string value become same if they came from the same non-transparent
72 /// macro (e.g. `macro` or `macro_rules!` items) and stay different if they came from different
73 /// non-transparent macros.
74 /// Technically, this operation strips all transparent marks from ident's syntactic context.
75 pub fn modern_and_legacy(self) -> Ident {
76 Ident::new(self.name, self.span.modern_and_legacy())
79 pub fn gensym(self) -> Ident {
80 Ident::new(self.name.gensymed(), self.span)
83 pub fn as_str(self) -> LocalInternedString {
87 pub fn as_interned_str(self) -> InternedString {
88 self.name.as_interned_str()
92 impl PartialEq for Ident {
93 fn eq(&self, rhs: &Self) -> bool {
94 self.name == rhs.name && self.span.ctxt() == rhs.span.ctxt()
99 fn hash<H: Hasher>(&self, state: &mut H) {
100 self.name.hash(state);
101 self.span.ctxt().hash(state);
105 impl fmt::Debug for Ident {
106 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107 write!(f, "{}{:?}", self.name, self.span.ctxt())
111 impl fmt::Display for Ident {
112 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 fmt::Display::fmt(&self.name, f)
117 impl Encodable for Ident {
118 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
119 if self.span.ctxt().modern() == SyntaxContext::empty() {
120 s.emit_str(&self.as_str())
121 } else { // FIXME(jseyfried) intercrate hygiene
122 let mut string = "#".to_owned();
123 string.push_str(&self.as_str());
129 impl Decodable for Ident {
130 fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> {
131 let string = d.read_str()?;
132 Ok(if !string.starts_with('#') {
133 Ident::from_str(&string)
134 } else { // FIXME(jseyfried) intercrate hygiene
135 Ident::with_empty_ctxt(Symbol::gensym(&string[1..]))
140 /// A symbol is an interned or gensymed string.
141 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142 pub struct Symbol(u32);
144 // The interner is pointed to by a thread local value which is only set on the main thread
145 // with parallelization is disabled. So we don't allow Symbol to transfer between threads
146 // to avoid panics and other errors, even though it would be memory safe to do so.
147 #[cfg(not(parallel_queries))]
148 impl !Send for Symbol { }
149 #[cfg(not(parallel_queries))]
150 impl !Sync for Symbol { }
153 /// Maps a string to its interned representation.
154 pub fn intern(string: &str) -> Self {
155 with_interner(|interner| interner.intern(string))
158 pub fn interned(self) -> Self {
159 with_interner(|interner| interner.interned(self))
162 /// gensym's a new usize, using the current interner.
163 pub fn gensym(string: &str) -> Self {
164 with_interner(|interner| interner.gensym(string))
167 pub fn gensymed(self) -> Self {
168 with_interner(|interner| interner.gensymed(self))
171 pub fn as_str(self) -> LocalInternedString {
172 with_interner(|interner| unsafe {
173 LocalInternedString {
174 string: ::std::mem::transmute::<&str, &str>(interner.get(self))
179 pub fn as_interned_str(self) -> InternedString {
180 with_interner(|interner| InternedString {
181 symbol: interner.interned(self)
185 pub fn as_u32(self) -> u32 {
190 impl fmt::Debug for Symbol {
191 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
192 let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
194 write!(f, "{}({})", self, self.0)
196 write!(f, "{}", self)
201 impl fmt::Display for Symbol {
202 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
203 fmt::Display::fmt(&self.as_str(), f)
207 impl Encodable for Symbol {
208 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
209 s.emit_str(&self.as_str())
213 impl Decodable for Symbol {
214 fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> {
215 Ok(Symbol::intern(&d.read_str()?))
219 impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
220 fn eq(&self, other: &T) -> bool {
221 self.as_str() == other.deref()
225 // The &'static strs in this type actually point into the arena
227 pub struct Interner {
228 arena: DroplessArena,
229 names: FxHashMap<&'static str, Symbol>,
230 strings: Vec<&'static str>,
231 gensyms: Vec<Symbol>,
235 fn prefill(init: &[&str]) -> Self {
236 let mut this = Interner::default();
237 for &string in init {
239 // We can't allocate empty strings in the arena, so handle this here
240 let name = Symbol(this.strings.len() as u32);
241 this.names.insert("", name);
242 this.strings.push("");
250 pub fn intern(&mut self, string: &str) -> Symbol {
251 if let Some(&name) = self.names.get(string) {
255 let name = Symbol(self.strings.len() as u32);
257 // from_utf8_unchecked is safe since we just allocated a &str which is known to be utf8
258 let string: &str = unsafe {
259 str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes()))
261 // It is safe to extend the arena allocation to 'static because we only access
262 // these while the arena is still alive
263 let string: &'static str = unsafe {
264 &*(string as *const str)
266 self.strings.push(string);
267 self.names.insert(string, name);
271 pub fn interned(&self, symbol: Symbol) -> Symbol {
272 if (symbol.0 as usize) < self.strings.len() {
275 self.interned(self.gensyms[(!0 - symbol.0) as usize])
279 fn gensym(&mut self, string: &str) -> Symbol {
280 let symbol = self.intern(string);
281 self.gensymed(symbol)
284 fn gensymed(&mut self, symbol: Symbol) -> Symbol {
285 self.gensyms.push(symbol);
286 Symbol(!0 - self.gensyms.len() as u32 + 1)
289 fn is_gensymed(&mut self, symbol: Symbol) -> bool {
290 symbol.0 as usize >= self.strings.len()
293 pub fn get(&self, symbol: Symbol) -> &str {
294 match self.strings.get(symbol.0 as usize) {
295 Some(string) => string,
296 None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
301 // In this macro, there is the requirement that the name (the number) must be monotonically
302 // increasing by one in the special identifiers, starting at 0; the same holds for the keywords,
303 // except starting from the next number instead of zero.
304 macro_rules! declare_keywords {(
305 $( ($index: expr, $konst: ident, $string: expr) )*
308 use super::{Symbol, Ident};
309 #[derive(Clone, Copy, PartialEq, Eq)]
314 #[inline] pub fn ident(self) -> Ident { self.ident }
315 #[inline] pub fn name(self) -> Symbol { self.ident.name }
318 #[allow(non_upper_case_globals)]
319 pub const $konst: Keyword = Keyword {
320 ident: Ident::with_empty_ctxt(super::Symbol($index))
324 impl ::std::str::FromStr for Keyword {
327 fn from_str(s: &str) -> Result<Self, ()> {
329 $($string => Ok($konst),)*
337 pub fn fresh() -> Self {
338 Interner::prefill(&[$($string,)*])
343 // NB: leaving holes in the ident table is bad! a different ident will get
344 // interned with the id from the hole, but it will be between the min and max
345 // of the reserved words, and thus tagged as "reserved".
346 // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`,
347 // this should be rarely necessary though if the keywords are kept in alphabetic order.
349 // Special reserved identifiers used internally for elided lifetimes,
350 // unnamed method parameters, crate root module, error recovery etc.
352 (1, CrateRoot, "{{root}}")
353 (2, DollarCrate, "$crate")
356 // Keywords used in the language.
361 (8, Continue, "continue")
365 (12, Extern, "extern")
380 (27, Return, "return")
381 (28, SelfValue, "self")
382 (29, SelfType, "Self")
383 (30, Static, "static")
384 (31, Struct, "struct")
389 (36, Unsafe, "unsafe")
394 // Keywords reserved for future use.
395 (40, Abstract, "abstract")
396 (41, Become, "become")
400 (45, Override, "override")
402 (47, Typeof, "typeof")
403 (48, Unsized, "unsized")
404 (49, Virtual, "virtual")
407 // Edition-specific keywords reserved for future use.
408 (51, Async, "async") // >= 2018 Edition only
409 (52, Dyn, "dyn") // >= 2018 Edition only
410 (53, Try, "try") // >= 2018 Edition only
412 // Special lifetime names
413 (54, UnderscoreLifetime, "'_")
414 (55, StaticLifetime, "'static")
416 // Weak keywords, have special meaning only in specific contexts.
419 (58, Default, "default")
421 (60, Existential, "existential")
425 fn is_unused_keyword_2018(self) -> bool {
426 self >= keywords::Async.name() && self <= keywords::Try.name()
431 // Returns true for reserved identifiers used internally for elided lifetimes,
432 // unnamed method parameters, crate root module, error recovery etc.
433 pub fn is_special(self) -> bool {
434 self.name <= keywords::Underscore.name()
437 /// Returns `true` if the token is a keyword used in the language.
438 pub fn is_used_keyword(self) -> bool {
439 self.name >= keywords::As.name() && self.name <= keywords::While.name()
442 /// Returns `true` if the token is a keyword reserved for possible future use.
443 pub fn is_unused_keyword(self) -> bool {
444 // Note: `span.edition()` is relatively expensive, don't call it unless necessary.
445 self.name >= keywords::Abstract.name() && self.name <= keywords::Yield.name() ||
446 self.name.is_unused_keyword_2018() && self.span.rust_2018()
449 /// Returns `true` if the token is either a special identifier or a keyword.
450 pub fn is_reserved(self) -> bool {
451 self.is_special() || self.is_used_keyword() || self.is_unused_keyword()
454 /// A keyword or reserved identifier that can be used as a path segment.
455 pub fn is_path_segment_keyword(self) -> bool {
456 self.name == keywords::Super.name() ||
457 self.name == keywords::SelfValue.name() ||
458 self.name == keywords::SelfType.name() ||
459 self.name == keywords::Extern.name() ||
460 self.name == keywords::Crate.name() ||
461 self.name == keywords::CrateRoot.name() ||
462 self.name == keywords::DollarCrate.name()
465 // We see this identifier in a normal identifier position, like variable name or a type.
466 // How was it written originally? Did it use the raw form? Let's try to guess.
467 pub fn is_raw_guess(self) -> bool {
468 self.name != keywords::Invalid.name() &&
469 self.is_reserved() && !self.is_path_segment_keyword()
473 // If an interner exists, return it. Otherwise, prepare a fresh one.
475 fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
476 GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
479 /// Represents a string stored in the interner. Because the interner outlives any thread
480 /// which uses this type, we can safely treat `string` which points to interner data,
481 /// as an immortal string, as long as this type never crosses between threads.
482 // FIXME: Ensure that the interner outlives any thread which uses LocalInternedString,
483 // by creating a new thread right after constructing the interner
484 #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
485 pub struct LocalInternedString {
486 string: &'static str,
489 impl LocalInternedString {
490 pub fn as_interned_str(self) -> InternedString {
492 symbol: Symbol::intern(self.string)
497 impl<U: ?Sized> ::std::convert::AsRef<U> for LocalInternedString
499 str: ::std::convert::AsRef<U>
501 fn as_ref(&self) -> &U {
506 impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for LocalInternedString {
507 fn eq(&self, other: &T) -> bool {
508 self.string == other.deref()
512 impl ::std::cmp::PartialEq<LocalInternedString> for str {
513 fn eq(&self, other: &LocalInternedString) -> bool {
518 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a str {
519 fn eq(&self, other: &LocalInternedString) -> bool {
520 *self == other.string
524 impl ::std::cmp::PartialEq<LocalInternedString> for String {
525 fn eq(&self, other: &LocalInternedString) -> bool {
530 impl<'a> ::std::cmp::PartialEq<LocalInternedString> for &'a String {
531 fn eq(&self, other: &LocalInternedString) -> bool {
532 *self == other.string
536 impl !Send for LocalInternedString {}
537 impl !Sync for LocalInternedString {}
539 impl ::std::ops::Deref for LocalInternedString {
541 fn deref(&self) -> &str { self.string }
544 impl fmt::Debug for LocalInternedString {
545 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
546 fmt::Debug::fmt(self.string, f)
550 impl fmt::Display for LocalInternedString {
551 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
552 fmt::Display::fmt(self.string, f)
556 impl Decodable for LocalInternedString {
557 fn decode<D: Decoder>(d: &mut D) -> Result<LocalInternedString, D::Error> {
558 Ok(Symbol::intern(&d.read_str()?).as_str())
562 impl Encodable for LocalInternedString {
563 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
564 s.emit_str(self.string)
568 /// Represents a string stored in the string interner
569 #[derive(Clone, Copy, Eq)]
570 pub struct InternedString {
574 impl InternedString {
575 pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
576 let str = with_interner(|interner| {
577 interner.get(self.symbol) as *const str
579 // This is safe because the interner keeps string alive until it is dropped.
580 // We can access it because we know the interner is still alive since we use a
581 // scoped thread local to access it, and it was alive at the beginning of this scope
585 pub fn as_symbol(self) -> Symbol {
589 pub fn as_str(self) -> LocalInternedString {
594 impl Hash for InternedString {
595 fn hash<H: Hasher>(&self, state: &mut H) {
596 self.with(|str| str.hash(state))
600 impl PartialOrd<InternedString> for InternedString {
601 fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
602 if self.symbol == other.symbol {
603 return Some(Ordering::Equal);
605 self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str)))
609 impl Ord for InternedString {
610 fn cmp(&self, other: &InternedString) -> Ordering {
611 if self.symbol == other.symbol {
612 return Ordering::Equal;
614 self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str)))
618 impl<T: ::std::ops::Deref<Target = str>> PartialEq<T> for InternedString {
619 fn eq(&self, other: &T) -> bool {
620 self.with(|string| string == other.deref())
624 impl PartialEq<InternedString> for InternedString {
625 fn eq(&self, other: &InternedString) -> bool {
626 self.symbol == other.symbol
630 impl PartialEq<InternedString> for str {
631 fn eq(&self, other: &InternedString) -> bool {
632 other.with(|string| self == string)
636 impl<'a> PartialEq<InternedString> for &'a str {
637 fn eq(&self, other: &InternedString) -> bool {
638 other.with(|string| *self == string)
642 impl PartialEq<InternedString> for String {
643 fn eq(&self, other: &InternedString) -> bool {
644 other.with(|string| self == string)
648 impl<'a> PartialEq<InternedString> for &'a String {
649 fn eq(&self, other: &InternedString) -> bool {
650 other.with(|string| *self == string)
654 impl ::std::convert::From<InternedString> for String {
655 fn from(val: InternedString) -> String {
656 val.as_symbol().to_string()
660 impl fmt::Debug for InternedString {
661 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
662 self.with(|str| fmt::Debug::fmt(&str, f))
666 impl fmt::Display for InternedString {
667 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
668 self.with(|str| fmt::Display::fmt(&str, f))
672 impl Decodable for InternedString {
673 fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> {
674 Ok(Symbol::intern(&d.read_str()?).as_interned_str())
678 impl Encodable for InternedString {
679 fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
680 self.with(|string| s.emit_str(string))
690 fn interner_tests() {
691 let mut i: Interner = Interner::default();
692 // first one is zero:
693 assert_eq!(i.intern("dog"), Symbol(0));
694 // re-use gets the same entry:
695 assert_eq!(i.intern("dog"), Symbol(0));
696 // different string gets a different #:
697 assert_eq!(i.intern("cat"), Symbol(1));
698 assert_eq!(i.intern("cat"), Symbol(1));
699 // dog is still at zero
700 assert_eq!(i.intern("dog"), Symbol(0));
701 assert_eq!(i.gensym("zebra"), Symbol(4294967295));
702 // gensym of same string gets new number :
703 assert_eq!(i.gensym("zebra"), Symbol(4294967294));
704 // gensym of *existing* string gets new number:
705 assert_eq!(i.gensym("dog"), Symbol(4294967293));
709 fn without_first_quote_test() {
710 GLOBALS.set(&Globals::new(), || {
711 let i = Ident::from_str("'break");
712 assert_eq!(i.without_first_quote().name, keywords::Break.name());