9 * Assumed index file structure: lines of form
11 * First field is key, second is byte offset into dictionary.
12 * Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
14 typedef struct Addr Addr;
17 int n; /* number of offsets */
18 int cur; /* current position within doff array */
19 int maxn; /* actual current size of doff array */
20 ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
25 Biobuf *bin = &binbuf; /* user cmd input */
26 Biobuf *bout = &boutbuf; /* output */
27 Biobuf *bdict; /* dictionary */
28 Biobuf *bindex; /* index file */
29 long indextop; /* index offset at end of file */
30 int lastcmd; /* last executed command */
31 Addr *dot; /* "current" address */
32 Dict *dict; /* current dictionary */
39 int getpref(char*, Rune*);
43 int parseaddr(char*, char**);
45 int search(char*, int);
46 long seeknextline(Biobuf*, long);
47 void setdotnext(void);
48 void setdotprev(void);
53 Plen=300, /* max length of a search pattern */
54 Fieldlen=200, /* max length of an index field */
55 Aslots=10, /* initial number of slots in an address */
59 main(int argc, char **argv)
64 Binit(&binbuf, 0, OREAD);
65 Binit(&boutbuf, 1, OWRITE);
69 for(i=0; dicts[i].name; i++){
70 if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
80 for(i=0; dicts[i].name; i++)
81 if(strcmp(p, dicts[i].name)==0) {
104 err("no dictionaries present on this system");
118 line = malloc(strlen(p)+5);
119 sprint(line, "/%s/P\n", p);
121 bdict = Bopen(dict->path, OREAD);
123 err("can't open dictionary %s", dict->path);
126 bindex = Bopen(dict->indexpath, OREAD);
128 err("can't open index %s", dict->indexpath);
131 indextop = Bseek(bindex, 0L, 2);
133 dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
140 cmd = parsecmd(line);
147 line = Brdline(bin, '\n');
151 cmd = parsecmd(line);
167 Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
168 Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
169 for(i = 0; dicts[i].name; i++){
171 if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
175 Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
186 if(parseaddr(line, &e) >= 0)
194 if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
196 err("unknown command %c", cmd);
201 case 0: ans = 'H'; break;
202 case 'H': ans = 'p'; break;
203 default : ans = lastcmd; break;
205 else if(line[1] != '\n' && line[1] != 0)
206 err("extra stuff after command %c ignored", cmd);
225 if(debug && doall && cmd == 'a')
226 Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
231 Bprint(bout, "%d\t", cur+1);
232 linelen += 4 + (cur >= 10);
236 Bprint(bout, "#%lud\n", dot->doff[cur]);
242 (*dict->printentry)(e, cmd);
247 if(cmd == 'p' || cmd == 'r') {
260 * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
261 * Answer goes in dot.
262 * Return -1 if address starts, but get error.
263 * Return 0 if no address.
266 parseaddr(char *line, char **eptr)
273 if(*line == '/' || *line == '!') {
274 /* anchored regular expression match; '!' means no folding */
277 e = strpbrk(line+1, "/\n");
280 e = strpbrk(line+1, "!\n");
284 err("pattern too big");
288 memcpy(pat+1, line+1, plen);
295 if(!search(pat, delim == '/')) {
296 err("pattern not found");
299 } else if(*line == '#') {
300 /* absolute byte offset into dictionary */
304 v = strtoul(line, &e, 10);
309 } else if(isdigit(*line)) {
310 v = strtoul(line, &e, 10);
312 if(v < 1 || v > dot->n)
313 err(".%d not in range [1,%d], ignored",
317 } else if(*line == '.') {
323 while(*line == '+' || *line == '-') {
335 * Index file is sorted by folded field1.
336 * Method: find pre, a folded prefix of r.e. pat,
337 * and then low = offset to beginning of
338 * line in index file where first match of prefix occurs.
339 * Then go through index until prefix no longer matches,
340 * adding each line that matches real pattern to dot.
341 * Finally, sort dot offsets (uniquing).
342 * We know pat len < Plen, and that it is surrounded by ^..$
345 search(char *pat, int dofold)
347 int needre, prelen, match, n;
352 Rune entry[Fieldlen];
355 prelen = getpref(pat+1, pre);
356 if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
375 Bseek(bindex, ioff, 0);
382 match = rregexec(re, entry, 0, 0);
384 match = (acomp(lit, entry) == 0);
389 if(dot->n >= dot->maxn) {
392 sizeof(Addr)+(n-1)*sizeof(long));
394 err("out of memory");
399 dot->doff[dot->n++] = v;
404 n = acomp(pre, entry);
405 if(n < -1 || (!needre && n < 0))
408 /* get to next index entry */
419 * Return offset in index file of first line whose folded
420 * first field has pre as a prefix. -1 if none found.
426 Rune entry[Fieldlen];
433 fprint(2, "locate looking for prefix %S\n", pre);
436 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
437 * and bot < top, and bot,top point at beginning of lines
440 mid = seeknextline(bindex, mid);
442 fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
443 bot, (top+bot) / 2, mid, top);
444 if(mid == top || !getfield(entry))
447 fprint(2, "key=%S\n", entry);
449 * here mid is strictly between bot and top
452 if(acomp(pre, entry) <= 0)
458 * bot < top, but they don't necessarily point at successive lines
459 * Use linear search from bot to find first line that pre is a
462 while((bot = seeknextline(bindex, bot)) <= top) {
466 fprint(2, "key=%S\n", entry);
468 switch(acomp(pre, entry)) {
484 * Get prefix of non re-metacharacters, runified, into pre,
488 getpref(char *pat, Rune *pre)
495 n = chartorune(pre, p);
498 case L'.': case L'*': case L'+': case L'?':
499 case L'[': case L']': case L'(': case ')':
500 case L'|': case L'^': case L'$':
505 p += chartorune(++pre, p);
517 seeknextline(Biobuf *b, long off)
524 } while(c>=0 && c!='\n');
529 * Get next field out of index file (either tab- or nl- terminated)
530 * Answer in *rp, assumed to be Fieldlen long.
531 * Return 0 if read error first.
539 for(n=Fieldlen; n-- > 0; ) {
540 if ((c = Bgetrune(bindex)) < 0)
542 if(c == '\t' || c == '\n') {
548 err("word too long");
553 * A compare longs function suitable for qsort
556 longcmp(void *av, void *bv)
582 qsort(a->doff, a->n, sizeof(long), longcmp);
584 /* remove duplicates */
585 for(i=0, j=0; j < a->n; j++) {
587 if(i > 0 && v == a->doff[i-1])
599 static int anslen = 0;
602 e = (*dict->nextoff)(b+1);
605 err("couldn't seek to entry");
611 ans.start = realloc(ans.start, n+1);
613 err("out of memory");
619 n = Bread(bdict, ans.start, n);
620 ans.end = ans.start + n;
631 b = (*dict->nextoff)(dot->doff[dot->cur]+1);
633 err("couldn't find a next entry");
647 if(dot->cur < 0 || dot->cur >= dot->n)
650 here = dot->doff[dot->cur];
657 p = (*dict->nextoff)(p+1);
659 return; /* shouldn't happen */
665 if(here - tryback < 0) {
666 err("can't find a previous entry");