1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with usize tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
17 use std::borrow::BorrowFrom;
18 use std::cell::RefCell;
19 use std::cmp::Ordering;
20 use std::collections::HashMap;
23 use std::collections::hash_map::Hasher;
27 pub struct Interner<T> {
28 map: RefCell<HashMap<T, Name>>,
29 vect: RefCell<Vec<T> >,
32 // when traits can extend traits, we should extend index<Name,T> to get []
33 impl<T: Eq + Hash<Hasher> + Clone + 'static> Interner<T> {
34 pub fn new() -> Interner<T> {
36 map: RefCell::new(HashMap::new()),
37 vect: RefCell::new(Vec::new()),
41 pub fn prefill(init: &[T]) -> Interner<T> {
42 let rv = Interner::new();
44 rv.intern((*v).clone());
49 pub fn intern(&self, val: T) -> Name {
50 let mut map = self.map.borrow_mut();
51 match (*map).get(&val) {
52 Some(&idx) => return idx,
56 let mut vect = self.vect.borrow_mut();
57 let new_idx = Name((*vect).len() as u32);
58 (*map).insert(val.clone(), new_idx);
63 pub fn gensym(&self, val: T) -> Name {
64 let mut vect = self.vect.borrow_mut();
65 let new_idx = Name((*vect).len() as u32);
66 // leave out of .map to avoid colliding
71 pub fn get(&self, idx: Name) -> T {
72 let vect = self.vect.borrow();
73 (*vect)[idx.usize()].clone()
76 pub fn len(&self) -> usize {
77 let vect = self.vect.borrow();
81 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
82 where Q: BorrowFrom<T> + Eq + Hash<Hasher> {
83 let map = self.map.borrow();
84 match (*map).get(val) {
91 *self.map.borrow_mut() = HashMap::new();
92 *self.vect.borrow_mut() = Vec::new();
96 #[derive(Clone, PartialEq, Hash, PartialOrd)]
102 pub fn new(string: &str) -> RcStr {
104 string: Rc::new(string.to_string()),
112 fn cmp(&self, other: &RcStr) -> Ordering {
117 impl fmt::Debug for RcStr {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
124 impl fmt::Display for RcStr {
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 use std::fmt::Display;
131 impl BorrowFrom<RcStr> for str {
132 fn borrow_from(owned: &RcStr) -> &str {
137 impl Deref for RcStr {
140 fn deref(&self) -> &str { &self.string[] }
143 /// A StrInterner differs from Interner<String> in that it accepts
144 /// &str rather than RcStr, resulting in less allocation.
145 pub struct StrInterner {
146 map: RefCell<HashMap<RcStr, Name>>,
147 vect: RefCell<Vec<RcStr> >,
150 /// When traits can extend traits, we should extend index<Name,T> to get []
152 pub fn new() -> StrInterner {
154 map: RefCell::new(HashMap::new()),
155 vect: RefCell::new(Vec::new()),
159 pub fn prefill(init: &[&str]) -> StrInterner {
160 let rv = StrInterner::new();
161 for &v in init { rv.intern(v); }
165 pub fn intern(&self, val: &str) -> Name {
166 let mut map = self.map.borrow_mut();
168 Some(&idx) => return idx,
172 let new_idx = Name(self.len() as u32);
173 let val = RcStr::new(val);
174 map.insert(val.clone(), new_idx);
175 self.vect.borrow_mut().push(val);
179 pub fn gensym(&self, val: &str) -> Name {
180 let new_idx = Name(self.len() as u32);
181 // leave out of .map to avoid colliding
182 self.vect.borrow_mut().push(RcStr::new(val));
186 // I want these gensyms to share name pointers
187 // with existing entries. This would be automatic,
188 // except that the existing gensym creates its
189 // own managed ptr using to_managed. I think that
190 // adding this utility function is the most
191 // lightweight way to get what I want, though not
192 // necessarily the cleanest.
194 /// Create a gensym with the same name as an existing
196 pub fn gensym_copy(&self, idx : Name) -> Name {
197 let new_idx = Name(self.len() as u32);
198 // leave out of map to avoid colliding
199 let mut vect = self.vect.borrow_mut();
200 let existing = (*vect)[idx.usize()].clone();
205 pub fn get(&self, idx: Name) -> RcStr {
206 (*self.vect.borrow())[idx.usize()].clone()
209 pub fn len(&self) -> usize {
210 self.vect.borrow().len()
213 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
214 where Q: BorrowFrom<RcStr> + Eq + Hash<Hasher> {
215 match (*self.map.borrow()).get(val) {
221 pub fn clear(&self) {
222 *self.map.borrow_mut() = HashMap::new();
223 *self.vect.borrow_mut() = Vec::new();
226 pub fn reset(&self, other: StrInterner) {
227 *self.map.borrow_mut() = other.map.into_inner();
228 *self.vect.borrow_mut() = other.vect.into_inner();
240 let i : Interner<RcStr> = Interner::new();
245 fn interner_tests () {
246 let i : Interner<RcStr> = Interner::new();
247 // first one is zero:
248 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
249 // re-use gets the same entry:
250 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
251 // different string gets a different #:
252 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
253 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
254 // dog is still at zero
255 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
257 assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2));
258 // gensym of same string gets new number :
259 assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3));
260 // gensym of *existing* string gets new number:
261 assert_eq!(i.gensym(RcStr::new("dog")), Name(4));
262 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
263 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
264 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
265 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
266 assert_eq!(i.get(Name(4)), RcStr::new("dog"));
271 let i : Interner<RcStr> = Interner::prefill(&[
276 assert_eq!(i.get(Name(0)), RcStr::new("Alan"));
277 assert_eq!(i.get(Name(1)), RcStr::new("Bob"));
278 assert_eq!(i.get(Name(2)), RcStr::new("Carol"));
279 assert_eq!(i.intern(RcStr::new("Bob")), Name(1));
283 fn string_interner_tests() {
284 let i : StrInterner = StrInterner::new();
285 // first one is zero:
286 assert_eq!(i.intern("dog"), Name(0));
287 // re-use gets the same entry:
288 assert_eq!(i.intern ("dog"), Name(0));
289 // different string gets a different #:
290 assert_eq!(i.intern("cat"), Name(1));
291 assert_eq!(i.intern("cat"), Name(1));
292 // dog is still at zero
293 assert_eq!(i.intern("dog"), Name(0));
295 assert_eq!(i.gensym("zebra"), Name(2));
296 // gensym of same string gets new number :
297 assert_eq!(i.gensym("zebra"), Name(3));
298 // gensym of *existing* string gets new number:
299 assert_eq!(i.gensym("dog"), Name(4));
300 // gensym tests again with gensym_copy:
301 assert_eq!(i.gensym_copy(Name(2)), Name(5));
302 assert_eq!(i.get(Name(5)), RcStr::new("zebra"));
303 assert_eq!(i.gensym_copy(Name(2)), Name(6));
304 assert_eq!(i.get(Name(6)), RcStr::new("zebra"));
305 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
306 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
307 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
308 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
309 assert_eq!(i.get(Name(4)), RcStr::new("dog"));