1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // An "interner" is a data structure that associates values with uint tags and
12 // allows bidirectional lookup; i.e. given a value, one can easily find the
13 // type, and vice versa.
17 use collections::HashMap;
19 use std::cell::RefCell;
25 pub struct Interner<T> {
26 priv map: RefCell<HashMap<T, Name>>,
27 priv vect: RefCell<Vec<T> >,
30 // when traits can extend traits, we should extend index<Name,T> to get []
31 impl<T: TotalEq + Hash + Clone + 'static> Interner<T> {
32 pub fn new() -> Interner<T> {
34 map: RefCell::new(HashMap::new()),
35 vect: RefCell::new(Vec::new()),
39 pub fn prefill(init: &[T]) -> Interner<T> {
40 let rv = Interner::new();
41 for v in init.iter() {
42 rv.intern((*v).clone());
47 pub fn intern(&self, val: T) -> Name {
48 let mut map = self.map.borrow_mut();
49 match (*map).find(&val) {
50 Some(&idx) => return idx,
54 let mut vect = self.vect.borrow_mut();
55 let new_idx = (*vect).len() as Name;
56 (*map).insert(val.clone(), new_idx);
61 pub fn gensym(&self, val: T) -> Name {
62 let mut vect = self.vect.borrow_mut();
63 let new_idx = (*vect).len() as Name;
64 // leave out of .map to avoid colliding
69 pub fn get(&self, idx: Name) -> T {
70 let vect = self.vect.borrow();
71 (*(*vect).get(idx as uint)).clone()
74 pub fn len(&self) -> uint {
75 let vect = self.vect.borrow();
79 pub fn find_equiv<Q:Hash + Equiv<T>>(&self, val: &Q) -> Option<Name> {
80 let map = self.map.borrow();
81 match (*map).find_equiv(val) {
88 *self.map.borrow_mut() = HashMap::new();
89 *self.vect.borrow_mut() = Vec::new();
93 #[deriving(Clone, Eq, Hash, Ord)]
95 priv string: Rc<~str>,
98 impl TotalEq for RcStr {
99 fn equals(&self, other: &RcStr) -> bool {
100 self.as_slice().equals(&other.as_slice())
104 impl TotalOrd for RcStr {
105 fn cmp(&self, other: &RcStr) -> Ordering {
106 self.as_slice().cmp(&other.as_slice())
112 fn as_slice<'a>(&'a self) -> &'a str {
113 let s: &'a str = *self.string;
118 fn into_owned(self) -> ~str {
119 self.string.to_owned()
123 impl fmt::Show for RcStr {
124 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 self.as_slice().fmt(f)
131 pub fn new(string: &str) -> RcStr {
133 string: Rc::new(string.to_owned()),
138 // A StrInterner differs from Interner<String> in that it accepts
139 // &str rather than RcStr, resulting in less allocation.
140 pub struct StrInterner {
141 priv map: RefCell<HashMap<RcStr, Name>>,
142 priv vect: RefCell<Vec<RcStr> >,
145 // when traits can extend traits, we should extend index<Name,T> to get []
147 pub fn new() -> StrInterner {
149 map: RefCell::new(HashMap::new()),
150 vect: RefCell::new(Vec::new()),
154 pub fn prefill(init: &[&str]) -> StrInterner {
155 let rv = StrInterner::new();
156 for &v in init.iter() { rv.intern(v); }
160 pub fn intern(&self, val: &str) -> Name {
161 let mut map = self.map.borrow_mut();
162 match map.find_equiv(&val) {
163 Some(&idx) => return idx,
167 let new_idx = self.len() as Name;
168 let val = RcStr::new(val);
169 map.insert(val.clone(), new_idx);
170 self.vect.borrow_mut().push(val);
174 pub fn gensym(&self, val: &str) -> Name {
175 let new_idx = self.len() as Name;
176 // leave out of .map to avoid colliding
177 self.vect.borrow_mut().push(RcStr::new(val));
181 // I want these gensyms to share name pointers
182 // with existing entries. This would be automatic,
183 // except that the existing gensym creates its
184 // own managed ptr using to_managed. I think that
185 // adding this utility function is the most
186 // lightweight way to get what I want, though not
187 // necessarily the cleanest.
189 // create a gensym with the same name as an existing
191 pub fn gensym_copy(&self, idx : Name) -> Name {
192 let new_idx = self.len() as Name;
193 // leave out of map to avoid colliding
194 let mut vect = self.vect.borrow_mut();
195 let existing = (*vect.get(idx as uint)).clone();
200 pub fn get(&self, idx: Name) -> RcStr {
201 (*self.vect.borrow().get(idx as uint)).clone()
204 /// Returns this string with lifetime tied to the interner. Since
205 /// strings may never be removed from the interner, this is safe.
206 pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str {
207 let vect = self.vect.borrow();
208 let s: &str = vect.get(idx as uint).as_slice();
214 pub fn len(&self) -> uint {
215 self.vect.borrow().len()
218 pub fn find_equiv<Q:Hash + Equiv<RcStr>>(&self, val: &Q) -> Option<Name> {
219 match (*self.map.borrow()).find_equiv(val) {
225 pub fn clear(&self) {
226 *self.map.borrow_mut() = HashMap::new();
227 *self.vect.borrow_mut() = Vec::new();
237 let i : Interner<RcStr> = Interner::new();
242 fn interner_tests () {
243 let i : Interner<RcStr> = Interner::new();
244 // first one is zero:
245 assert_eq!(i.intern(RcStr::new("dog")), 0);
246 // re-use gets the same entry:
247 assert_eq!(i.intern(RcStr::new("dog")), 0);
248 // different string gets a different #:
249 assert_eq!(i.intern(RcStr::new("cat")), 1);
250 assert_eq!(i.intern(RcStr::new("cat")), 1);
251 // dog is still at zero
252 assert_eq!(i.intern(RcStr::new("dog")), 0);
254 assert_eq!(i.gensym(RcStr::new("zebra") ), 2);
255 // gensym of same string gets new number :
256 assert_eq!(i.gensym (RcStr::new("zebra") ), 3);
257 // gensym of *existing* string gets new number:
258 assert_eq!(i.gensym(RcStr::new("dog")), 4);
259 assert_eq!(i.get(0), RcStr::new("dog"));
260 assert_eq!(i.get(1), RcStr::new("cat"));
261 assert_eq!(i.get(2), RcStr::new("zebra"));
262 assert_eq!(i.get(3), RcStr::new("zebra"));
263 assert_eq!(i.get(4), RcStr::new("dog"));
268 let i : Interner<RcStr> = Interner::prefill([
273 assert_eq!(i.get(0), RcStr::new("Alan"));
274 assert_eq!(i.get(1), RcStr::new("Bob"));
275 assert_eq!(i.get(2), RcStr::new("Carol"));
276 assert_eq!(i.intern(RcStr::new("Bob")), 1);
280 fn string_interner_tests() {
281 let i : StrInterner = StrInterner::new();
282 // first one is zero:
283 assert_eq!(i.intern("dog"), 0);
284 // re-use gets the same entry:
285 assert_eq!(i.intern ("dog"), 0);
286 // different string gets a different #:
287 assert_eq!(i.intern("cat"), 1);
288 assert_eq!(i.intern("cat"), 1);
289 // dog is still at zero
290 assert_eq!(i.intern("dog"), 0);
292 assert_eq!(i.gensym("zebra"), 2);
293 // gensym of same string gets new number :
294 assert_eq!(i.gensym("zebra"), 3);
295 // gensym of *existing* string gets new number:
296 assert_eq!(i.gensym("dog"), 4);
297 // gensym tests again with gensym_copy:
298 assert_eq!(i.gensym_copy(2), 5);
299 assert_eq!(i.get(5), RcStr::new("zebra"));
300 assert_eq!(i.gensym_copy(2), 6);
301 assert_eq!(i.get(6), RcStr::new("zebra"));
302 assert_eq!(i.get(0), RcStr::new("dog"));
303 assert_eq!(i.get(1), RcStr::new("cat"));
304 assert_eq!(i.get(2), RcStr::new("zebra"));
305 assert_eq!(i.get(3), RcStr::new("zebra"));
306 assert_eq!(i.get(4), RcStr::new("dog"));