1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! An "interner" is a data structure that associates values with uint tags and
12 //! allows bidirectional lookup; i.e. given a value, one can easily find the
13 //! type, and vice versa.
17 use std::borrow::BorrowFrom;
18 use std::cell::RefCell;
19 use std::cmp::Ordering;
20 use std::collections::HashMap;
26 pub struct Interner<T> {
27 map: RefCell<HashMap<T, Name>>,
28 vect: RefCell<Vec<T> >,
31 // when traits can extend traits, we should extend index<Name,T> to get .index(&FullRange)
32 impl<T: Eq + Hash + Clone + 'static> Interner<T> {
33 pub fn new() -> Interner<T> {
35 map: RefCell::new(HashMap::new()),
36 vect: RefCell::new(Vec::new()),
40 pub fn prefill(init: &[T]) -> Interner<T> {
41 let rv = Interner::new();
42 for v in init.iter() {
43 rv.intern((*v).clone());
48 pub fn intern(&self, val: T) -> Name {
49 let mut map = self.map.borrow_mut();
50 match (*map).get(&val) {
51 Some(&idx) => return idx,
55 let mut vect = self.vect.borrow_mut();
56 let new_idx = Name((*vect).len() as u32);
57 (*map).insert(val.clone(), new_idx);
62 pub fn gensym(&self, val: T) -> Name {
63 let mut vect = self.vect.borrow_mut();
64 let new_idx = Name((*vect).len() as u32);
65 // leave out of .map to avoid colliding
70 pub fn get(&self, idx: Name) -> T {
71 let vect = self.vect.borrow();
72 (*vect)[idx.uint()].clone()
75 pub fn len(&self) -> uint {
76 let vect = self.vect.borrow();
80 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
81 where Q: BorrowFrom<T> + Eq + Hash {
82 let map = self.map.borrow();
83 match (*map).get(val) {
90 *self.map.borrow_mut() = HashMap::new();
91 *self.vect.borrow_mut() = Vec::new();
95 #[derive(Clone, PartialEq, Hash, PartialOrd)]
101 pub fn new(string: &str) -> RcStr {
103 string: Rc::new(string.to_string()),
111 fn cmp(&self, other: &RcStr) -> Ordering {
112 self.index(&FullRange).cmp(other.index(&FullRange))
116 impl fmt::Show for RcStr {
117 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 self.index(&FullRange).fmt(f)
123 impl BorrowFrom<RcStr> for str {
124 fn borrow_from(owned: &RcStr) -> &str {
125 owned.string.index(&FullRange)
129 impl Deref for RcStr {
132 fn deref(&self) -> &str { self.string.index(&FullRange) }
135 /// A StrInterner differs from Interner<String> in that it accepts
136 /// &str rather than RcStr, resulting in less allocation.
137 pub struct StrInterner {
138 map: RefCell<HashMap<RcStr, Name>>,
139 vect: RefCell<Vec<RcStr> >,
142 /// When traits can extend traits, we should extend index<Name,T> to get .index(&FullRange)
144 pub fn new() -> StrInterner {
146 map: RefCell::new(HashMap::new()),
147 vect: RefCell::new(Vec::new()),
151 pub fn prefill(init: &[&str]) -> StrInterner {
152 let rv = StrInterner::new();
153 for &v in init.iter() { rv.intern(v); }
157 pub fn intern(&self, val: &str) -> Name {
158 let mut map = self.map.borrow_mut();
160 Some(&idx) => return idx,
164 let new_idx = Name(self.len() as u32);
165 let val = RcStr::new(val);
166 map.insert(val.clone(), new_idx);
167 self.vect.borrow_mut().push(val);
171 pub fn gensym(&self, val: &str) -> Name {
172 let new_idx = Name(self.len() as u32);
173 // leave out of .map to avoid colliding
174 self.vect.borrow_mut().push(RcStr::new(val));
178 // I want these gensyms to share name pointers
179 // with existing entries. This would be automatic,
180 // except that the existing gensym creates its
181 // own managed ptr using to_managed. I think that
182 // adding this utility function is the most
183 // lightweight way to get what I want, though not
184 // necessarily the cleanest.
186 /// Create a gensym with the same name as an existing
188 pub fn gensym_copy(&self, idx : Name) -> Name {
189 let new_idx = Name(self.len() as u32);
190 // leave out of map to avoid colliding
191 let mut vect = self.vect.borrow_mut();
192 let existing = (*vect)[idx.uint()].clone();
197 pub fn get(&self, idx: Name) -> RcStr {
198 (*self.vect.borrow())[idx.uint()].clone()
201 pub fn len(&self) -> uint {
202 self.vect.borrow().len()
205 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
206 where Q: BorrowFrom<RcStr> + Eq + Hash {
207 match (*self.map.borrow()).get(val) {
213 pub fn clear(&self) {
214 *self.map.borrow_mut() = HashMap::new();
215 *self.vect.borrow_mut() = Vec::new();
218 pub fn reset(&self, other: StrInterner) {
219 *self.map.borrow_mut() = other.map.into_inner();
220 *self.vect.borrow_mut() = other.vect.into_inner();
232 let i : Interner<RcStr> = Interner::new();
237 fn interner_tests () {
238 let i : Interner<RcStr> = Interner::new();
239 // first one is zero:
240 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
241 // re-use gets the same entry:
242 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
243 // different string gets a different #:
244 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
245 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
246 // dog is still at zero
247 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
249 assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2));
250 // gensym of same string gets new number :
251 assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3));
252 // gensym of *existing* string gets new number:
253 assert_eq!(i.gensym(RcStr::new("dog")), Name(4));
254 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
255 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
256 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
257 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
258 assert_eq!(i.get(Name(4)), RcStr::new("dog"));
263 let i : Interner<RcStr> = Interner::prefill(&[
268 assert_eq!(i.get(Name(0)), RcStr::new("Alan"));
269 assert_eq!(i.get(Name(1)), RcStr::new("Bob"));
270 assert_eq!(i.get(Name(2)), RcStr::new("Carol"));
271 assert_eq!(i.intern(RcStr::new("Bob")), Name(1));
275 fn string_interner_tests() {
276 let i : StrInterner = StrInterner::new();
277 // first one is zero:
278 assert_eq!(i.intern("dog"), Name(0));
279 // re-use gets the same entry:
280 assert_eq!(i.intern ("dog"), Name(0));
281 // different string gets a different #:
282 assert_eq!(i.intern("cat"), Name(1));
283 assert_eq!(i.intern("cat"), Name(1));
284 // dog is still at zero
285 assert_eq!(i.intern("dog"), Name(0));
287 assert_eq!(i.gensym("zebra"), Name(2));
288 // gensym of same string gets new number :
289 assert_eq!(i.gensym("zebra"), Name(3));
290 // gensym of *existing* string gets new number:
291 assert_eq!(i.gensym("dog"), Name(4));
292 // gensym tests again with gensym_copy:
293 assert_eq!(i.gensym_copy(Name(2)), Name(5));
294 assert_eq!(i.get(Name(5)), RcStr::new("zebra"));
295 assert_eq!(i.gensym_copy(Name(2)), Name(6));
296 assert_eq!(i.get(Name(6)), RcStr::new("zebra"));
297 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
298 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
299 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
300 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
301 assert_eq!(i.get(Name(4)), RcStr::new("dog"));