]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/dict/comfix.awk
cc, ?[acl]: fix gethunk() and move common memory allocator code to cc/compat
[plan9front.git] / sys / src / cmd / dict / comfix.awk
1 # when raw index has a lot of entries like
2 # 1578324       problematico, a, ci, che
3 # apply this algorithm:
4 #  treat things after comma as suffixes
5 #  for each suffix:
6 #      if single letter, replace last letter
7 #      else search backwards for beginning of suffix
8 #      and if it leads to an old suffix of approximately
9 #      the same length, put replace that suffix
10 # This will still leave some commas to fix by hand
11 # Usage: awk -F'        ' -f comfix.awk rawindex > newrawindex
12
13 NF == 2 {
14                 i = index($2, ",")
15                 if(i == 0 || length($2) == 0)
16                         print $0
17                 else {
18                         n = split($2, a, /,[ ]*/)
19                         w = a[1]
20                         printf "%s\t%s\n", $1, w
21                         for(i = 2; i <= n; i++) {
22                                 suf = a[i]
23                                 m = matchsuflen(w, suf)
24                                 if(m) {
25                                         nw = substr(w, 1, length(w)-m) suf
26                                         printf "%s\t%s\n", $1, nw
27                                 } else
28                                         printf "%s\t%s\n", $1, w ", " suf
29                         }
30                 }
31         }
32 NF != 2 {
33         print $0
34         }
35
36 function matchsuflen(w, suf,            wlen,suflen,c,pat,k,d)
37 {
38         wlen = length(w)
39         suflen = length(suf)
40         if(suflen == 1)
41                 return 1
42         else {
43                 c = substr(suf, 1, 1)
44                 for (k = 1; k <= wlen ; k++)
45                         if(substr(w, wlen-k+1, 1) == c)
46                                 break
47                 if(k > wlen)
48                         return 0
49                 d = k-suflen
50                 if(d < 0)
51                         d = -d
52                 if(d > 3)
53                         return 0
54                 return k
55         }
56 }