7 /* read an annotated spelling list in the form
8 word <tab> affixcode [ , affixcode ] ...
9 print a reencoded version
13 typedef struct Dict Dict;
28 void readinput(int f);
29 long typecode(char *str);
30 int wcmp(void*, void*);
35 main(int argc, char *argv[])
39 Binit(&bout, 1, OWRITE);
48 fprint(2, "Cannot open %s\n", argv[1]);
55 fprint(2, "words = %ld; space = %ld; codes = %d\n",
56 nwords, nspace, ncodes);
57 qsort(words, nwords, sizeof(words[0]), wcmp);
62 wcmp(void *a, void *b)
65 return strcmp(((Dict*)a)->word, ((Dict*)b)->word);
72 char *code, *line, *bword;
76 Binit(&buf, f, OREAD);
77 while(line = Brdline(&buf, '\n')) {
78 line[Blinelen(&buf)-1] = 0;
84 while(*code && !isspace(*code))
88 memmove(space+nspace, bword, i);
89 words[nwords].word = space+nspace;
99 words[nwords].encode = typecode(code);
101 if(nwords >= sizeof(words)/sizeof(words[0])) {
102 fprint(2, "words array too small\n");
105 if(nspace >= sizeof(space)/sizeof(space[0])) {
106 fprint(2, "space array too small\n");
114 typedef struct Class Class;
162 { "nopref", NOPREF },
168 { "pc", PROP_COLLECT },
236 for(s=str; *s != 0 && *s != ','; s++)
238 for(p = codetab[*str-'a']; sp = p->codename; p++) {
240 for(n=s-str;; st++,sp++) {
254 fprint(2, "Unknown affix code \"%s\"\n", str);
257 for(i=0; i<ncodes; i++)
258 if(encodes[i] == code)
283 * spit out the encoded dictionary
284 * all numbers are encoded big-endian.
288 * long encodes[ncodes];
295 * 0x8000 flag for code word
296 * 0x7800 count of number of common bytes with previous word
297 * 0x07ff index into codes array for affixes
304 char *lastword, *thisword, *word;
307 for(i=0; i<ncodes; i++)
310 count = ncodes*4 + 2;
312 for(i=0; i<nwords; i++) {
313 word = words[i].word;
315 for(j=0; *thisword == *lastword; j++) {
317 fprint(2, "identical words: %s\n", word);
325 encode = words[i].encode;
326 c = (1<<15) | (j<<11) | encode;
329 for(thisword=word+j; c = *thisword; thisword++) {
335 fprint(2, "output bytes = %ld\n", count);