7 /* fig leaves for possibly signed char quantities */
8 #define ISUPPER(c) isupper((c)&0xff)
9 #define ISLOWER(c) islower((c)&0xff)
10 #define ISALPHA(c) isalpha((c)&0xff)
11 #define ISDIGIT(c) isdigit((c)&0xff)
12 #define ISVOWEL(c) voweltab[(c)&0xff]
13 #define Tolower(c) (ISUPPER(c)? (c)-'A'+'a': (c))
14 #define pair(a,b) (((a)<<8) | (b))
19 #define Set(h, f) ((long)(h) & (f))
21 Bits nop(char*, char*, char*, int, int);
22 Bits strip(char*, char*, char*, int, int);
23 Bits ize(char*, char*, char*, int, int);
24 Bits i_to_y(char*, char*, char*, int, int);
25 Bits ily(char*, char*, char*, int, int);
26 Bits subst(char*, char*, char*, int, int);
27 Bits CCe(char*, char*, char*, int, int);
28 Bits tion(char*, char*, char*, int, int);
29 Bits an(char*, char*, char*, int, int);
30 Bits s(char*, char*, char*, int, int);
31 Bits es(char*, char*, char*, int, int);
32 Bits bility(char*, char*, char*, int, int);
33 Bits y_to_e(char*, char*, char*, int, int);
34 Bits VCe(char*, char*, char*, int, int);
36 Bits trypref(char*, char*, int, int);
37 Bits tryword(char*, char*, int, int);
38 Bits trysuff(char*, int, int);
39 Bits dict(char*, char*);
46 int inun(char*, Bits);
50 typedef struct Ptab Ptab;
57 typedef struct Suftab Suftab;
61 Bits (*p1)(char*, char*, char*, int, int);
67 Bits (*p2)(char*, char*, char*, int, int);
74 {"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
80 {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
81 {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
82 {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
83 {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
84 {"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
85 {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
86 {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
87 {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
88 {"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
93 {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
94 {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
100 * V_affix for comment ->commence->commentment??
102 {"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
103 {"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
104 {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
105 {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
106 {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
107 {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
108 {"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
113 {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
114 {"gnikam",strip,6,"","+making",NOUN,NOUN},
115 {"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
116 {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
121 {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
122 {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
123 {"latnem",strip,2,"","+al",N_AFFIX,ADJ},
124 {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
125 {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
130 /* congregational + ism */
131 {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
132 {"margo",subst,-1,"-ph+m","",NOUN,NOUN},
137 {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
138 {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
139 {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
140 {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
141 {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
142 {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
143 {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
144 {"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
145 {"nem",strip,3,"","+man",MAN,PROP_COLLECT},
146 {"nosrep",strip,6,"","+person",MAN,PROP_COLLECT},
151 {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
156 {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
157 {"reyhparg",nop,0,"","",0,NOUN},
158 {"reyl",nop,0,"","",0,NOUN},
159 {"rekam",strip,5,"","+maker",NOUN,NOUN},
160 {"repeek",strip,6,"","+keeper",NOUN,NOUN},
161 {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"},
162 {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
163 {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
164 {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
169 {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
170 {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
171 {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"},
172 {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
173 {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH },
178 {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
179 {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" },
180 {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
181 {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
186 {"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
187 {"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
188 {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
189 {"ytisuo",nop,0,"","",NOUN},
190 {"ytilb",nop,0,"","",0,NOUN},
191 {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
192 {"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
193 {"ylc",nop,0,"","",0},
194 {"ylelb",nop,0,"","",0},
195 {"ylelp",nop,0,"","",0},
196 {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
197 {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
198 {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
368 "under", 0, /*must precede un*/
425 enum { NONE, SUFF, PREF} type;
438 char affix[DSIZ*10]; /* 10 is longest affix message */
442 char space[300000]; /* must be as large as "words"+"space" in pcode run */
443 Bits encode[2048]; /* must be as long as "codes" in pcode run */
446 char* spacep[128*128+1]; /* pointer to words starting with 'xx' */
450 char* codefile = "/sys/lib/amspell";
451 char* brfile = "/sys/lib/brspell";
452 char* Usage = "usage";
455 main(int argc, char *argv[])
463 Binit(&bin, 0, OREAD);
464 Binit(&bout, 1, OWRITE);
465 for(i=0; c = "aeiouyAEIOUY"[i]; i++)
468 if(argv[1][0] != '-')
470 for(i=1; c = argv[1][i]; i++)
473 fprint(2, "usage: spell [-bcCvx] [-f file]\n");
486 case 'C': /* for "correct" */
488 case 'c': /* for ocr */
502 fprint(2, "spell: -f requires another argument\n");
517 fprint(2, "usage: spell [-bcCvx] [-f file]\n");
525 original = Brdline(&bin, '\n');
528 original[Blinelen(&bin)-1] = 0;
533 while(*original != ':')
536 while(*++original != ':')
541 for(ep=word,dp=original; j = *dp; ep++,dp++) {
544 if(ep >= word+sizeof(word)-1)
550 if(ISDIGIT(word[0]) && ordinal())
554 if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
555 for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
558 for(;;) { /* at most twice */
559 if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
561 if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
563 if(!ISUPPER(word[0]))
572 word[0] = Tolower(word[0]);
576 if(!h || Set(h,STOP))
581 print("%c",'0' + (suffcount>0) +
582 (prefcount>4? 8: 2*prefcount));
583 } else if(!h || Set(h,STOP)) {
585 Bprint(&bout, "%s:%s\n", acmeid, original);
587 Bprint(&bout, "%s\n", original);
588 } else if(affix[0] != 0 && affix[0] != '.')
589 print("%s\t%s\n", affix, original);
594 /* strip exactly one suffix and do
595 * indicated routine(s), which may recursively
599 trysuff(char* ep, int lev, int flag)
604 int initchar = ep[-1];
609 deriv[lev] = emptyderiv;
610 deriv[lev-1] = emptyderiv;
612 if(!ISLOWER(initchar))
614 for(t=suftab[initchar-'a']; sp=t->suf; t++) {
619 for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);)
623 if(!(t->affixable & flag))
625 h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
628 deriv[lev] = emptyderiv;
629 deriv[lev+1] = emptyderiv;
631 h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
640 nop(char* ep, char* d, char* a, int lev, int flag)
642 USED(ep, d, a, lev, flag);
647 cstrip(char* ep, char* d, char* a, int lev, int flag)
651 if(ISVOWEL(temp) && ISVOWEL(ep[-1])) {
652 switch(pair(ep[-1],ep[0])) {
664 if(temp==ep[-1]&&temp==ep[-2])
666 return strip(ep,d,a,lev,flag);
670 strip(char* ep, char* d, char* a, int lev, int flag)
672 Bits h = trypref(ep, a, lev, flag);
675 if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2]))
679 if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) {
680 h = trypref(ep-1,a,lev,flag|MONO);
684 return trysuff(ep,lev,flag);
688 s(char* ep, char* d, char* a, int lev, int flag)
695 if(ISVOWEL(ep[-2])||ISUPPER(*word))
696 break; /*says Kennedys*/
709 return strip(ep,d,a,lev,flag);
713 an(char* ep, char* d, char* a, int lev, int flag)
716 if(!ISUPPER(*word)) /*must be proper name*/
718 return trypref(ep,a,lev,flag);
722 ize(char* ep, char* d, char* a, int lev, int flag)
729 h = strip(ep,"",d,lev,flag);
735 y_to_e(char* ep, char* d, char* a, int lev, int flag)
749 h = strip(ep,"",d,lev,flag);
755 ily(char* ep, char* d, char* a, int lev, int flag)
760 if(temp==ep[-1]&&temp==ep[-2]) /* sillly */
762 if(*--cp=='y' && !ISVOWEL(*--cp)) /* happyly */
764 if(ISVOWEL(*--cp)) /* shyness */
767 return i_to_y(ep,d,a,lev,flag);
768 return cstrip(ep,d,a,lev,flag);
772 bility(char* ep, char* d, char* a, int lev, int flag)
775 return y_to_e(ep,d,a,lev,flag);
779 i_to_y(char* ep, char* d, char* a, int lev, int flag)
786 if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) {
790 h = cstrip(ep,"",a,lev,flag);
796 es(char* ep, char* d, char* a, int lev, int flag)
804 return i_to_y(ep,d,a,lev,flag);
816 return strip(ep,d,a,lev,flag);
821 subst(char* ep, char* d, char* a, int lev, int flag)
827 if(skipv(skipv(ep-1)) < word)
829 for(t=d; *t!='+'; t++)
831 for(u=ep; *--t!='-';)
833 h = strip(ep,"",d,lev,flag);
842 tion(char* ep, char* d, char* a, int lev, int flag)
846 return trypref(ep,a,lev,flag);
852 return y_to_e(ep,d,a,lev,flag);
857 * possible consonant-consonant-e ending
860 CCe(char* ep, char* d, char* a, int lev, int flag)
874 return y_to_e(ep,d,a,lev,flag);
879 if(*ep == 'a') /* prevent -able for -eable */
889 if(h = y_to_e(ep,d,a,lev,flag))
891 if(!(ep[-2]=='n' && ep[-1]=='g'))
894 return VCe(ep,d,a,lev,flag);
898 * possible consonant-vowel-consonant-e ending
901 VCe(char* ep, char* d, char* a, int lev, int flag)
909 if(!ISVOWEL(c) && ISVOWEL(ep[-2])) {
912 h = trypref(ep,d,lev,flag);
914 h = trysuff(ep,lev,flag);
920 return cstrip(ep,d,a,lev,flag);
924 lookuppref(uchar** wp, char* ep)
928 unsigned int initchar = Tolower(**wp);
930 if(!ISALPHA(initchar))
932 for(sp=preftab[initchar-'a'];sp->s;sp++) {
934 for(cp= (uchar*)sp->s;*cp; )
937 for(cp=bp;cp<(uchar*)ep;cp++)
947 /* while word is not in dictionary try stripping
948 * prefixes. Fail if no more prefixes.
951 trypref(char* ep, char* a, int lev, int flag)
961 deriv[lev].type = *a=='.'? NONE: SUFF;
963 if(h = tryword(word,ep,lev,flag)) {
964 if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
971 deriv[lev+1].mesg = pp;
972 deriv[lev+1].type = 0;
974 while(tp=lookuppref((uchar**)&bp,ep)) {
977 while(pp<space+sizeof(space) && (*pp = *cp++))
979 deriv[lev+1].type += PREF;
980 h = tryword(bp,ep,lev+1,flag);
982 ((tp->flag&IN) && inun(bp-2,h)==0)) {
986 if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
991 deriv[lev+1] = emptyderiv;
992 deriv[lev+2] = emptyderiv;
998 tryword(char* bp, char* ep, int lev, int flag)
1008 deriv[++lev].mesg = duple;
1009 deriv[lev].type = SUFF;
1016 if(vflag==0 || h==0)
1019 * when derivations are wanted, collect them
1023 prefcount = suffcount = 0;
1025 if(j<DSIZ && deriv[j].type) {
1026 strcat(affix, deriv[j].mesg);
1027 if(deriv[j].type == SUFF)
1029 else if(deriv[j].type != NONE)
1030 prefcount = deriv[j].type/PREF;
1037 inun(char* bp, Bits h)
1040 return Set(h, IN) == 0;
1046 return bp[1] == 'r';
1049 return bp[1] == 'm';
1051 return bp[1] == 'n';
1057 if(s >= word && ISVOWEL(*s))
1059 while(s >= word && !ISVOWEL(*s))
1065 * crummy way to Britishise
1074 for(p = suftab[i]; p->suf; p++) {
1075 p->suf = ztos(p->suf);
1076 p->d1 = ztos(p->d1);
1077 p->a1 = ztos(p->a1);
1100 dict(char* bp, char* ep)
1102 char *cp, *cp1, *w, *wp, *we;
1120 fprint(2, "=%.*s\n", utfnlen(w, n), w);
1124 * find the beginning of some word in the middle
1126 cp = bp + (ep-bp)/2;
1128 while(cp > bp && !(*cp & 0x80))
1130 while(cp > bp && (cp[-1] & 0x80))
1133 wp = w + 2; /* skip two letters */
1134 cp1 = cp + 2; /* skip affix code */
1153 while(!(*cp1 & 0x80))
1162 f = ((cp[0] & 0x7) << 8) |
1165 fprint(2, "=%.*s ", utfnlen(w, n), w);
1166 typeprint(encode[f]);
1178 if(h & PROP_COLLECT)
1181 if((h & VERB) == VERB)
1184 if((h & VERB) == V_IRREG)
1193 if((h & COMP) == ACTOR)
1236 fprint(2, ",%s", s);
1240 * is the word on of the following
1245 * called knowing word[0] is a digit
1256 if(ISUPPER(cp[0]) && ISUPPER(cp[1])) {
1257 sp[0] = Tolower(cp[0]);
1258 sp[1] = Tolower(cp[1]);
1260 return 0 == strncmp(sp,
1261 cp[-2]=='1'? "th": /* out of bounds if 1 digit */
1262 *--cp=='1'? "st": /* harmless */
1269 * read in the dictionary.
1273 * long encode[nencode];
1277 * the encodings are a table all different
1279 * the dictionary proper has 2 bytes
1280 * that demark and then the rest of the
1281 * word. the 2 bytes have the following
1283 * 0x78 0x00 count of prefix bytes
1284 * common with prev word
1285 * 0x07 0xff affix code
1287 * all ints are big endians in the file.
1290 readdict(char *file)
1292 char *s, *is, *lasts, *ls;
1300 fprint(2, "cannot open %s\n", file);
1303 if(read(f, space, 2) != 2)
1305 nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
1306 if(read(f, space, 4*nencode) != 4*nencode)
1309 for(i=0; i<nencode; i++) {
1310 l = (long)(s[0] & 0xff) << 24;
1311 l |= (s[1] & 0xff) << 16;
1312 l |= (s[2] & 0xff) << 8;
1314 encode[i] = (Bits)l;
1317 l = read(f, space, sizeof(space));
1318 if(l == sizeof(space))
1320 is = space + (sizeof(space) - l);
1321 memmove(is, space, l);
1335 *s = 0x80; /* fence */
1340 *s++ = *is++ & 0xff;
1342 i = (*is++ & 0xff)*128;
1345 i = i/128*128 + (*is++ & 0xff);
1347 fprint(2, "the dict isnt sorted or \n");
1348 fprint(2, "memmove didn't work\n");
1359 if(is >= space+sizeof(space)) {
1372 fprint(2, "trouble reading %s\n", file);
1375 fprint(2, "not enough space for dictionary\n");