9 * Assumed index file structure: lines of form
11 * First field is key, second is byte offset into dictionary.
12 * Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
14 typedef struct Addr Addr;
17 int n; /* number of offsets */
18 int cur; /* current position within doff array */
19 int maxn; /* actual current size of doff array */
20 ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
25 Biobuf *bin = &binbuf; /* user cmd input */
26 Biobuf *bout = &boutbuf; /* output */
27 Biobuf *bdict; /* dictionary */
28 Biobuf *bindex; /* index file */
29 long indextop; /* index offset at end of file */
30 int lastcmd; /* last executed command */
31 Addr *dot; /* "current" address */
32 Dict *dict; /* current dictionary */
39 int getpref(char*, Rune*);
43 int parseaddr(char*, char**);
45 int search(char*, int);
46 long seeknextline(Biobuf*, long);
47 void setdotnext(void);
48 void setdotprev(void);
53 Plen=300, /* max length of a search pattern */
54 Fieldlen=200, /* max length of an index field */
55 Aslots=10, /* initial number of slots in an address */
59 main(int argc, char **argv)
64 Binit(&binbuf, 0, OREAD);
65 Binit(&boutbuf, 1, OWRITE);
69 for(i=0; dicts[i].name; i++){
70 if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
79 for(i=0; dicts[i].name; i++)
80 if(strcmp(p, dicts[i].name)==0) {
88 line = EARGF(usage());
100 err("no dictionaries present on this system");
114 line = malloc(strlen(p)+5);
115 sprint(line, "/%s/P\n", p);
117 bdict = Bopen(dict->path, OREAD);
119 err("can't open dictionary %s", dict->path);
122 bindex = Bopen(dict->indexpath, OREAD);
124 err("can't open index %s", dict->indexpath);
127 indextop = Bseek(bindex, 0L, 2);
129 dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
136 cmd = parsecmd(line);
143 line = Brdline(bin, '\n');
147 cmd = parsecmd(line);
163 Bprint(bout, "usage: %s [-k] [-d dict] [-c cmd] [pattern]\n", argv0);
164 Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
165 for(i = 0; dicts[i].name; i++){
167 if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
171 Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
182 if(parseaddr(line, &e) >= 0)
190 if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
192 err("unknown command %c", cmd);
197 case 0: ans = 'H'; break;
198 case 'H': ans = 'p'; break;
199 default : ans = lastcmd; break;
201 else if(line[1] != '\n' && line[1] != 0)
202 err("extra stuff after command %c ignored", cmd);
221 if(debug && doall && cmd == 'a')
222 Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
227 Bprint(bout, "%d\t", cur+1);
228 linelen += 4 + (cur >= 10);
232 Bprint(bout, "#%lud\n", dot->doff[cur]);
238 (*dict->printentry)(e, cmd);
243 if(cmd == 'p' || cmd == 'r') {
256 * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
257 * Answer goes in dot.
258 * Return -1 if address starts, but get error.
259 * Return 0 if no address.
262 parseaddr(char *line, char **eptr)
269 if(*line == '/' || *line == '!') {
270 /* anchored regular expression match; '!' means no folding */
273 e = strpbrk(line+1, "/\n");
276 e = strpbrk(line+1, "!\n");
280 err("pattern too big");
284 memcpy(pat+1, line+1, plen);
291 if(!search(pat, delim == '/')) {
292 err("pattern not found");
295 } else if(*line == '#') {
296 /* absolute byte offset into dictionary */
300 v = strtoul(line, &e, 10);
305 } else if(isdigit(*line)) {
306 v = strtoul(line, &e, 10);
308 if(v < 1 || v > dot->n)
309 err(".%d not in range [1,%d], ignored",
313 } else if(*line == '.') {
319 while(*line == '+' || *line == '-') {
331 * Index file is sorted by folded field1.
332 * Method: find pre, a folded prefix of r.e. pat,
333 * and then low = offset to beginning of
334 * line in index file where first match of prefix occurs.
335 * Then go through index until prefix no longer matches,
336 * adding each line that matches real pattern to dot.
337 * Finally, sort dot offsets (uniquing).
338 * We know pat len < Plen, and that it is surrounded by ^..$
341 search(char *pat, int dofold)
343 int needre, prelen, match, n;
348 Rune entry[Fieldlen];
351 prelen = getpref(pat+1, pre);
352 if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
353 runestrcpy(lit, pre);
371 Bseek(bindex, ioff, 0);
378 match = rregexec(re, entry, 0, 0);
380 match = (acomp(lit, entry) == 0);
385 if(dot->n >= dot->maxn) {
388 sizeof(Addr)+(n-1)*sizeof(long));
390 sysfatal("realloc: %r");
393 dot->doff[dot->n++] = v;
398 n = acomp(pre, entry);
399 if(n < -1 || (!needre && n < 0))
402 /* get to next index entry */
413 * Return offset in index file of first line whose folded
414 * first field has pre as a prefix. -1 if none found.
420 Rune entry[Fieldlen];
427 fprint(2, "locate looking for prefix %S\n", pre);
430 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
431 * and bot < top, and bot,top point at beginning of lines
434 mid = seeknextline(bindex, mid);
436 fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
437 bot, (top+bot) / 2, mid, top);
438 if(mid == top || !getfield(entry))
441 fprint(2, "key=%S\n", entry);
443 * here mid is strictly between bot and top
446 if(acomp(pre, entry) <= 0)
452 * bot < top, but they don't necessarily point at successive lines
453 * Use linear search from bot to find first line that pre is a
456 while((bot = seeknextline(bindex, bot)) <= top) {
460 fprint(2, "key=%S\n", entry);
462 switch(acomp(pre, entry)) {
478 * Get prefix of non re-metacharacters, runified, into pre,
482 getpref(char *pat, Rune *pre)
489 n = chartorune(pre, p);
492 case L'.': case L'*': case L'+': case L'?':
493 case L'[': case L']': case L'(': case ')':
494 case L'|': case L'^': case L'$':
499 p += chartorune(++pre, p);
511 seeknextline(Biobuf *b, long off)
518 } while(c>=0 && c!='\n');
523 * Get next field out of index file (either tab- or nl- terminated)
524 * Answer in *rp, assumed to be Fieldlen long.
525 * Return 0 if read error first.
533 for(n=Fieldlen; n-- > 0; ) {
534 if ((c = Bgetrune(bindex)) < 0)
536 if(c == '\t' || c == '\n') {
542 err("word too long");
547 * A compare longs function suitable for qsort
550 longcmp(void *av, void *bv)
576 qsort(a->doff, a->n, sizeof(long), longcmp);
578 /* remove duplicates */
579 for(i=0, j=0; j < a->n; j++) {
581 if(i > 0 && v == a->doff[i-1])
593 static int anslen = 0;
596 e = (*dict->nextoff)(b+1);
599 err("couldn't seek to entry");
605 ans.start = realloc(ans.start, n+1);
607 sysfatal("realloc: %r");
611 n = Bread(bdict, ans.start, n);
612 ans.end = ans.start + n;
623 b = (*dict->nextoff)(dot->doff[dot->cur]+1);
625 err("couldn't find a next entry");
639 if(dot->cur < 0 || dot->cur >= dot->n)
642 here = dot->doff[dot->cur];
649 p = (*dict->nextoff)(p+1);
651 return; /* shouldn't happen */
657 if(here - tryback < 0) {
658 err("can't find a previous entry");