/// Returns the Levenshtein Distance between two strings.
fn lev_distance(&self, t: &str) -> uint {
let me = self.as_slice();
- let slen = me.len();
- let tlen = t.len();
+ if me.is_empty() { return t.char_len(); }
+ if t.is_empty() { return me.char_len(); }
- if slen == 0 { return tlen; }
- if tlen == 0 { return slen; }
-
- let mut dcol = Vec::from_fn(tlen + 1, |x| x);
+ let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
+ let mut t_last = 0;
for (i, sc) in me.chars().enumerate() {
*dcol.get_mut(j + 1) = current;
} else {
*dcol.get_mut(j + 1) = cmp::min(current, next);
- *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1],
- dcol[j]) + 1;
+ *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1;
}
current = next;
+ t_last = j;
}
}
- return dcol[tlen];
+ dcol[t_last + 1]
}
/// Returns an iterator over the string in Unicode Normalization Form D
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}
+ #[test]
+ fn test_lev_distance() {
+ use std::char::{ from_u32, MAX };
+ // Test bytelength agnosticity
+ for c in range(0u32, MAX as u32)
+ .filter_map(|i| from_u32(i))
+ .map(|i| String::from_char(1, i)) {
+ assert_eq!(c[].lev_distance(c[]), 0);
+ }
+
+ let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+ let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
+ let c = "Mary häd ä little lämb\n\nLittle lämb\n";
+ assert_eq!(a.lev_distance(b), 1);
+ assert_eq!(b.lev_distance(a), 1);
+ assert_eq!(a.lev_distance(c), 2);
+ assert_eq!(c.lev_distance(a), 2);
+ assert_eq!(b.lev_distance(c), 1);
+ assert_eq!(c.lev_distance(b), 1);
+ }
+
#[test]
fn test_nfd_chars() {
macro_rules! t {