13 typedef struct Table Table;
21 typedef struct Word Word;
24 Stringtab *s; /* from hmsg */
25 int count[MAXTAB]; /* counts from each table */
26 double p[MAXTAB]; /* probabilities from each table */
27 double mp; /* max probability */
28 int mi; /* w.p[w.mi] = w.mp */
43 fprint(2, "usage: bayes [-D] [-m maxword] boxhash ... ~ msghash ...\n");
54 sysfatal("out of memory");
63 for(i=nbest-1; i>=0; i--)
64 if(w->mp < best[i].mp)
73 memmove(&best[i+1], &best[i], (nbest-i)*sizeof(best[0]));
84 if((b = Bopenlock(s, OREAD)) == nil)
85 sysfatal("open %s: %r", s);
87 h = emalloc(sizeof(Hash));
94 main(int argc, char **argv)
96 int i, j, a, mi, oi, tot, keywords;
97 double totp, p, xp[MAXTAB];
113 mbest = atoi(EARGF(usage()));
115 sysfatal("cannot keep more than %d words", MAXBEST);
121 for(i=0; i<argc; i++)
122 if(strcmp(argv[i], "~") == 0)
126 sysfatal("cannot handle more than %d tables", MAXTAB);
131 for(i=0; i<argc; i++){
132 if(strcmp(argv[i], "~") == 0)
134 tab[ntab].file = argv[i];
135 tab[ntab].hash = hread(argv[i]);
136 s = findstab(tab[ntab].hash, "*nmsg*", 6, 1);
137 if(s == nil || s->count == 0)
140 tab[ntab].nmsg = s->count;
144 Binit(&bout, 1, OWRITE);
147 for(a=i; a<argc; a++){
148 hmsg = hread(argv[a]);
150 for(s=hmsg->all; s; s=s->link){
154 for(i=0; i<ntab; i++){
155 t = findstab(tab[i].hash, s->str, s->n, 0);
159 w.count[i] = t->count;
161 p = w.count[i]/(double)tab[i].nmsg;
168 if(tot < 5){ /* word does not appear enough; give to box 0 */
170 for(i=1; i<ntab; i++)
179 for(i=0; i<ntab; i++){
196 for(i=0; i<ntab; i++){
198 for(j=0; j<nbest; j++)
203 for(i=0; i<ntab; i++)
206 for(i=1; i<ntab; i++)
210 Bprint(&bout, "%s: ", argv[a]);
211 Bprint(&bout, "%s %f", tab[mi].file, xp[mi]);
213 for(i=0; i<nbest; i++){
215 Bwrite(&bout, best[i].s->str, best[i].s->n);
216 Bprint(&bout, " %f", best[i].p[mi]);
222 for(i=0; i<nbest; i++){
223 Bwrite(&bout, best[i].s->str, best[i].s->n);
224 Bprint(&bout, " %f", best[i].p[mi]);
225 if(best[i].p[mi] < best[i].mp)
226 Bprint(&bout, " (%f %s)", best[i].mp, tab[best[i].mi].file);