1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // An "interner" is a data structure that associates values with uint tags and
12 // allows bidirectional lookup; i.e. given a value, one can easily find the
13 // type, and vice versa.
18 use std::cell::RefCell;
20 use std::hashmap::HashMap;
23 pub struct Interner<T> {
24 priv map: @RefCell<HashMap<T, Name>>,
25 priv vect: @RefCell<~[T]>,
28 // when traits can extend traits, we should extend index<Name,T> to get []
29 impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
30 pub fn new() -> Interner<T> {
32 map: @RefCell::new(HashMap::new()),
33 vect: @RefCell::new(~[]),
37 pub fn prefill(init: &[T]) -> Interner<T> {
38 let rv = Interner::new();
39 for v in init.iter() {
40 rv.intern((*v).clone());
45 pub fn intern(&self, val: T) -> Name {
46 let mut map = self.map.borrow_mut();
47 match map.get().find(&val) {
48 Some(&idx) => return idx,
52 let mut vect = self.vect.borrow_mut();
53 let new_idx = vect.get().len() as Name;
54 map.get().insert(val.clone(), new_idx);
59 pub fn gensym(&self, val: T) -> Name {
60 let mut vect = self.vect.borrow_mut();
61 let new_idx = vect.get().len() as Name;
62 // leave out of .map to avoid colliding
67 pub fn get(&self, idx: Name) -> T {
68 let vect = self.vect.borrow();
69 vect.get()[idx].clone()
72 pub fn len(&self) -> uint {
73 let vect = self.vect.borrow();
77 pub fn find_equiv<Q:Hash + IterBytes + Equiv<T>>(&self, val: &Q)
79 let map = self.map.borrow();
80 match map.get().find_equiv(val) {
87 #[deriving(Clone, Eq, IterBytes, Ord)]
89 priv string: Rc<~str>,
92 impl TotalEq for RcStr {
93 fn equals(&self, other: &RcStr) -> bool {
94 self.as_slice().equals(&other.as_slice())
98 impl TotalOrd for RcStr {
99 fn cmp(&self, other: &RcStr) -> Ordering {
100 self.as_slice().cmp(&other.as_slice())
106 fn as_slice<'a>(&'a self) -> &'a str {
107 let s: &'a str = *self.string.borrow();
112 fn into_owned(self) -> ~str {
113 self.string.borrow().to_owned()
118 pub fn new(string: &str) -> RcStr {
120 string: Rc::new(string.to_owned()),
125 // A StrInterner differs from Interner<String> in that it accepts
126 // references rather than @ ones, resulting in less allocation.
127 pub struct StrInterner {
128 priv map: @RefCell<HashMap<RcStr, Name>>,
129 priv vect: @RefCell<~[RcStr]>,
132 // when traits can extend traits, we should extend index<Name,T> to get []
134 pub fn new() -> StrInterner {
136 map: @RefCell::new(HashMap::new()),
137 vect: @RefCell::new(~[]),
141 pub fn prefill(init: &[&str]) -> StrInterner {
142 let rv = StrInterner::new();
143 for &v in init.iter() { rv.intern(v); }
147 pub fn intern(&self, val: &str) -> Name {
148 let mut map = self.map.borrow_mut();
149 match map.get().find_equiv(&val) {
150 Some(&idx) => return idx,
154 let new_idx = self.len() as Name;
155 let val = RcStr::new(val);
156 map.get().insert(val.clone(), new_idx);
157 let mut vect = self.vect.borrow_mut();
158 vect.get().push(val);
162 pub fn gensym(&self, val: &str) -> Name {
163 let new_idx = self.len() as Name;
164 // leave out of .map to avoid colliding
165 let mut vect = self.vect.borrow_mut();
166 vect.get().push(RcStr::new(val));
170 // I want these gensyms to share name pointers
171 // with existing entries. This would be automatic,
172 // except that the existing gensym creates its
173 // own managed ptr using to_managed. I think that
174 // adding this utility function is the most
175 // lightweight way to get what I want, though not
176 // necessarily the cleanest.
178 // create a gensym with the same name as an existing
180 pub fn gensym_copy(&self, idx : Name) -> Name {
181 let new_idx = self.len() as Name;
182 // leave out of map to avoid colliding
183 let mut vect = self.vect.borrow_mut();
184 let existing = vect.get()[idx].clone();
185 vect.get().push(existing);
189 pub fn get(&self, idx: Name) -> RcStr {
190 let vect = self.vect.borrow();
191 vect.get()[idx].clone()
194 /// Returns this string with lifetime tied to the interner. Since
195 /// strings may never be removed from the interner, this is safe.
196 pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str {
197 let vect = self.vect.borrow();
198 let s: &str = vect.get()[idx].as_slice();
204 pub fn len(&self) -> uint {
205 let vect = self.vect.borrow();
209 pub fn find_equiv<Q:Hash + IterBytes + Equiv<RcStr>>(&self, val: &Q)
211 let map = self.map.borrow();
212 match map.get().find_equiv(val) {
225 let i : Interner<RcStr> = Interner::new();
230 fn interner_tests () {
231 let i : Interner<RcStr> = Interner::new();
232 // first one is zero:
233 assert_eq!(i.intern(RcStr::new("dog")), 0);
234 // re-use gets the same entry:
235 assert_eq!(i.intern(RcStr::new("dog")), 0);
236 // different string gets a different #:
237 assert_eq!(i.intern(RcStr::new("cat")), 1);
238 assert_eq!(i.intern(RcStr::new("cat")), 1);
239 // dog is still at zero
240 assert_eq!(i.intern(RcStr::new("dog")), 0);
242 assert_eq!(i.gensym(RcStr::new("zebra") ), 2);
243 // gensym of same string gets new number :
244 assert_eq!(i.gensym (RcStr::new("zebra") ), 3);
245 // gensym of *existing* string gets new number:
246 assert_eq!(i.gensym(RcStr::new("dog")), 4);
247 assert_eq!(i.get(0), RcStr::new("dog"));
248 assert_eq!(i.get(1), RcStr::new("cat"));
249 assert_eq!(i.get(2), RcStr::new("zebra"));
250 assert_eq!(i.get(3), RcStr::new("zebra"));
251 assert_eq!(i.get(4), RcStr::new("dog"));
256 let i : Interner<@~str> = Interner::prefill([
261 assert_eq!(i.get(0), RcStr::new("Alan"));
262 assert_eq!(i.get(1), RcStr::new("Bob"));
263 assert_eq!(i.get(2), RcStr::new("Carol"));
264 assert_eq!(i.intern(RcStr::new("Bob")), 1);
268 fn string_interner_tests() {
269 let i : StrInterner = StrInterner::new();
270 // first one is zero:
271 assert_eq!(i.intern("dog"), 0);
272 // re-use gets the same entry:
273 assert_eq!(i.intern ("dog"), 0);
274 // different string gets a different #:
275 assert_eq!(i.intern("cat"), 1);
276 assert_eq!(i.intern("cat"), 1);
277 // dog is still at zero
278 assert_eq!(i.intern("dog"), 0);
280 assert_eq!(i.gensym("zebra"), 2);
281 // gensym of same string gets new number :
282 assert_eq!(i.gensym("zebra"), 3);
283 // gensym of *existing* string gets new number:
284 assert_eq!(i.gensym("dog"), 4);
285 // gensym tests again with gensym_copy:
286 assert_eq!(i.gensym_copy(2), 5);
287 assert_eq!(i.get(5), RcStr::new("zebra"));
288 assert_eq!(i.gensym_copy(2), 6);
289 assert_eq!(i.get(6), RcStr::new("zebra"));
290 assert_eq!(i.get(0), RcStr::new("dog"));
291 assert_eq!(i.get(1), RcStr::new("cat"));
292 assert_eq!(i.get(2), RcStr::new("zebra"));
293 assert_eq!(i.get(3), RcStr::new("zebra"));
294 assert_eq!(i.get(4), RcStr::new("dog"));