4 /// Find the Levenshtein distance between two strings
5 pub fn lev_distance(a: &str, b: &str) -> usize {
6 // cases which don't require further computation
8 return b.chars().count();
9 } else if b.is_empty() {
10 return a.chars().count();
13 let mut dcol: Vec<_> = (0..=b.len()).collect();
16 for (i, sc) in a.chars().enumerate() {
18 dcol[0] = current + 1;
20 for (j, tc) in b.chars().enumerate() {
21 let next = dcol[j + 1];
23 dcol[j + 1] = current;
25 dcol[j + 1] = cmp::min(current, next);
26 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
35 /// Find the best match for a given word in the given iterator
37 /// As a loose rule to avoid the obviously incorrect suggestions, it takes
38 /// an optional limit for the maximum allowable edit distance, which defaults
39 /// to one-third of the given word.
41 /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
42 /// a lower(upper)case letters mismatch.
43 pub fn find_best_match_for_name<'a, T>(iter_names: T,
45 dist: Option<usize>) -> Option<Symbol>
46 where T: Iterator<Item = &'a Symbol> {
47 let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d);
49 let (case_insensitive_match, levenstein_match) = iter_names
51 let dist = lev_distance(lookup, &name.as_str());
58 // Here we are collecting the next structure:
59 // (case_insensitive_match, (levenstein_match, levenstein_distance))
60 .fold((None, None), |result, (candidate, dist)| {
62 if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
68 None => Some((candidate, dist)),
69 Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) })
74 if let Some(candidate) = case_insensitive_match {
75 Some(candidate) // exact case insensitive match has a higher priority
77 if let Some((candidate, _)) = levenstein_match { Some(candidate) } else { None }
82 fn test_lev_distance() {
83 use std::char::{from_u32, MAX};
84 // Test bytelength agnosticity
85 for c in (0..MAX as u32)
86 .filter_map(|i| from_u32(i))
87 .map(|i| i.to_string()) {
88 assert_eq!(lev_distance(&c[..], &c[..]), 0);
91 let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
92 let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
93 let c = "Mary häd ä little lämb\n\nLittle lämb\n";
94 assert_eq!(lev_distance(a, b), 1);
95 assert_eq!(lev_distance(b, a), 1);
96 assert_eq!(lev_distance(a, c), 2);
97 assert_eq!(lev_distance(c, a), 2);
98 assert_eq!(lev_distance(b, c), 1);
99 assert_eq!(lev_distance(c, b), 1);
103 fn test_find_best_match_for_name() {
106 let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
108 find_best_match_for_name(input.iter(), "aaaa", None),
109 Some(Symbol::intern("aaab"))
113 find_best_match_for_name(input.iter(), "1111111111", None),
117 let input = vec![Symbol::intern("aAAA")];
119 find_best_match_for_name(input.iter(), "AAAA", None),
120 Some(Symbol::intern("aAAA"))
123 let input = vec![Symbol::intern("AAAA")];
124 // Returns None because `lev_distance > max_dist / 3`
126 find_best_match_for_name(input.iter(), "aaaa", None),
130 let input = vec![Symbol::intern("AAAA")];
132 find_best_match_for_name(input.iter(), "aaaa", Some(4)),
133 Some(Symbol::intern("AAAA"))