]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/dict/utils.c
rsa: rename getkey() to getrsakey(), document rsa2csr in rsa(8)
[plan9front.git] / sys / src / cmd / dict / utils.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5
6 Dict dicts[] = {
7         {"oed",         "Oxford English Dictionary, 2nd Ed.",
8          "/lib/dict/oed2",      "/lib/dict/oed2index",
9          oednextoff,    oedprintentry,          oedprintkey},
10         {"ahd",         "American Heritage Dictionary, 2nd College Ed.",
11          "/lib/ahd/DICT.DB",    "/lib/ahd/index",
12          ahdnextoff,    ahdprintentry,          ahdprintkey},
13         {"pgw",         "Project Gutenberg Webster Dictionary",
14          "/lib/dict/pgw",       "/lib/dict/pgwindex",
15          pgwnextoff,    pgwprintentry,          pgwprintkey},
16         {"thesaurus",   "Collins Thesaurus",
17          "/lib/dict/thesaurus", "/lib/dict/thesindex",
18          thesnextoff,   thesprintentry, thesprintkey},
19         {"roget",               "Project Gutenberg Roget's Thesaurus",
20          "/lib/dict/roget", "/lib/dict/rogetindex",
21          rogetnextoff,  rogetprintentry,        rogetprintkey},
22
23         {"ce",          "Gendai Chinese->English",
24          "/lib/dict/world/sansdata/sandic24.dat",
25          "/lib/dict/world/sansdata/ceindex",
26          worldnextoff,  worldprintentry,        worldprintkey},
27         {"ceh",         "Gendai Chinese->English (Hanzi index)",
28          "/lib/dict/world/sansdata/sandic24.dat",
29          "/lib/dict/world/sansdata/cehindex",
30          worldnextoff,  worldprintentry,        worldprintkey},
31         {"ec",          "Gendai English->Chinese",
32          "/lib/dict/world/sansdata/sandic24.dat",
33          "/lib/dict/world/sansdata/ecindex",
34          worldnextoff,  worldprintentry,        worldprintkey},
35
36         {"dae",         "Gyldendal Danish->English",
37          "/lib/dict/world/gylddata/sandic30.dat",
38          "/lib/dict/world/gylddata/daeindex",
39          worldnextoff,  worldprintentry,        worldprintkey},
40         {"eda",         "Gyldendal English->Danish",
41          "/lib/dict/world/gylddata/sandic29.dat",
42          "/lib/dict/world/gylddata/edaindex",
43          worldnextoff,  worldprintentry,        worldprintkey},
44
45         {"due",         "Wolters-Noordhoff Dutch->English",
46          "/lib/dict/world/woltdata/sandic07.dat",
47          "/lib/dict/world/woltdata/deindex",
48          worldnextoff,  worldprintentry,        worldprintkey},
49         {"edu",         "Wolters-Noordhoff English->Dutch",
50          "/lib/dict/world/woltdata/sandic06.dat",
51          "/lib/dict/world/woltdata/edindex",
52          worldnextoff,  worldprintentry,        worldprintkey},
53
54         {"fie",         "WSOY Finnish->English",
55          "/lib/dict/world/werndata/sandic32.dat",
56          "/lib/dict/world/werndata/fieindex",
57          worldnextoff,  worldprintentry,        worldprintkey},
58         {"efi",         "WSOY English->Finnish",
59          "/lib/dict/world/werndata/sandic31.dat",
60          "/lib/dict/world/werndata/efiindex",
61          worldnextoff,  worldprintentry,        worldprintkey},
62
63         {"fe",          "Collins French->English",
64          "/lib/dict/fe",        "/lib/dict/feindex",
65          pcollnextoff,  pcollprintentry,        pcollprintkey},
66         {"ef",          "Collins English->French",
67          "/lib/dict/ef",        "/lib/dict/efindex",
68          pcollnextoff,  pcollprintentry,        pcollprintkey},
69
70         {"ge",          "Collins German->English",
71          "/lib/dict/ge",        "/lib/dict/geindex",
72          pcollgnextoff, pcollgprintentry,       pcollgprintkey},
73         {"eg",          "Collins English->German",
74          "/lib/dict/eg",        "/lib/dict/egindex",
75          pcollgnextoff, pcollgprintentry,       pcollgprintkey},
76
77         {"ie",          "Collins Italian->English",
78          "/lib/dict/ie",        "/lib/dict/ieindex",
79          pcollnextoff,  pcollprintentry,        pcollprintkey},
80         {"ei",          "Collins English->Italian",
81          "/lib/dict/ei",        "/lib/dict/eiindex",
82          pcollnextoff,  pcollprintentry,        pcollprintkey},
83
84         {"je",          "Sanshusha Japanese->English",
85          "/lib/dict/world/sansdata/sandic18.dat",
86          "/lib/dict/world/sansdata/jeindex",
87          worldnextoff,  worldprintentry,        worldprintkey},
88         {"jek",         "Sanshusha Japanese->English (Kanji index)",
89          "/lib/dict/world/sansdata/sandic18.dat",
90          "/lib/dict/world/sansdata/jekindex",
91          worldnextoff,  worldprintentry,        worldprintkey},
92         {"ej",          "Sanshusha English->Japanese",
93          "/lib/dict/world/sansdata/sandic18.dat",
94          "/lib/dict/world/sansdata/ejindex",
95          worldnextoff,  worldprintentry,        worldprintkey},
96
97         {"tjeg",        "Sanshusha technical Japanese->English,German",
98          "/lib/dict/world/sansdata/sandic16.dat",
99          "/lib/dict/world/sansdata/tjegindex",
100          worldnextoff,  worldprintentry,        worldprintkey},
101         {"tjegk",       "Sanshusha technical Japanese->English,German (Kanji index)",
102          "/lib/dict/world/sansdata/sandic16.dat",
103          "/lib/dict/world/sansdata/tjegkindex",
104          worldnextoff,  worldprintentry,        worldprintkey},
105         {"tegj",        "Sanshusha technical English->German,Japanese",
106          "/lib/dict/world/sansdata/sandic16.dat",
107          "/lib/dict/world/sansdata/tegjindex",
108          worldnextoff,  worldprintentry,        worldprintkey},
109         {"tgje",        "Sanshusha technical German->Japanese,English",
110          "/lib/dict/world/sansdata/sandic16.dat",
111          "/lib/dict/world/sansdata/tgjeindex",
112          worldnextoff,  worldprintentry,        worldprintkey},
113
114         {"ne",          "Kunnskapforlaget Norwegian->English",
115          "/lib/dict/world/kunndata/sandic28.dat",
116          "/lib/dict/world/kunndata/neindex",
117          worldnextoff,  worldprintentry,        worldprintkey},
118         {"en",          "Kunnskapforlaget English->Norwegian",
119          "/lib/dict/world/kunndata/sandic27.dat",
120          "/lib/dict/world/kunndata/enindex",
121          worldnextoff,  worldprintentry,        worldprintkey},
122
123         {"re",          "Leon Ungier Russian->English",
124          "/lib/dict/re",        "/lib/dict/reindex",
125          simplenextoff, simpleprintentry,       simpleprintkey},
126         {"er",          "Leon Ungier English->Russian",
127          "/lib/dict/re",        "/lib/dict/erindex",
128          simplenextoff, simpleprintentry,       simpleprintkey},
129
130         {"se",          "Collins Spanish->English",
131          "/lib/dict/se",        "/lib/dict/seindex",
132          pcollnextoff,  pcollprintentry,        pcollprintkey},
133         {"es",          "Collins English->Spanish",
134          "/lib/dict/es",        "/lib/dict/esindex",
135          pcollnextoff,  pcollprintentry,        pcollprintkey},
136
137         {"swe",         "Esselte Studium Swedish->English",
138          "/lib/dict/world/essedata/sandic34.dat",
139          "/lib/dict/world/essedata/sweindex",
140          worldnextoff,  worldprintentry,        worldprintkey},
141         {"esw",         "Esselte Studium English->Swedish",
142          "/lib/dict/world/essedata/sandic33.dat",
143          "/lib/dict/world/essedata/eswindex",
144          worldnextoff,  worldprintentry,        worldprintkey},
145
146         {"movie",       "Movies -- by title",
147          "/lib/movie/data",     "/lib/dict/movtindex",
148          movienextoff,  movieprintentry,        movieprintkey},
149         {"moviea",      "Movies -- by actor",
150          "/lib/movie/data",     "/lib/dict/movaindex",
151          movienextoff,  movieprintentry,        movieprintkey},
152         {"movied",      "Movies -- by director",
153          "/lib/movie/data",     "/lib/dict/movdindex",
154          movienextoff,  movieprintentry,        movieprintkey},
155
156         {"slang",       "English Slang",
157          "/lib/dict/slang",     "/lib/dict/slangindex",
158          slangnextoff,  slangprintentry,        slangprintkey},
159
160         {"robert",      "Robert Électronique",
161          "/lib/dict/robert/_pointers",  "/lib/dict/robert/_index",
162          robertnextoff, robertindexentry,       robertprintkey},
163         {"robertv",     "Robert Électronique - formes des verbes",
164          "/lib/dict/robert/flex.rob",   "/lib/dict/robert/_flexindex",
165          robertnextflex,        robertflexentry,        robertprintkey},
166
167         {0, 0, 0, 0, 0}
168 };
169
170 typedef struct Lig Lig;
171 struct Lig {
172         Rune    start;          /* accent rune */
173         Rune    *pairs;         /* <char,accented version> pairs */
174 };
175
176 static Lig ligtab[Nligs] = {
177 [LACU-LIGS]     {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
178 [LGRV-LIGS]     {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
179 [LUML-LIGS]     {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
180 [LCED-LIGS]     {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
181 [LTIL-LIGS]     {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
182 [LBRV-LIGS]     {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
183 [LRNG-LIGS]     {L'˚', L"AÅaåUŮuů"},
184 [LDOT-LIGS]     {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
185 [LDTB-LIGS]     {L'.',  L""},
186 [LFRN-LIGS]     {L'⌢',        L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
187 [LFRB-LIGS]     {L'̯', L""},
188 [LOGO-LIGS]     {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"},
189 [LMAC-LIGS]     {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
190 [LHCK-LIGS]     {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
191 [LASP-LIGS]     {L'ʽ', L""},
192 [LLEN-LIGS]     {L'ʼ', L""},
193 [LBRB-LIGS]     {L'̮', L""}
194 };
195
196 Rune *multitab[Nmulti] = {
197 [MAAS-MULTI]    L"ʽα",
198 [MALN-MULTI]    L"ʼα",
199 [MAND-MULTI]    L"and",
200 [MAOQ-MULTI]    L"a/q",
201 [MBRA-MULTI]    L"<|",
202 [MDD-MULTI]     L"..",
203 [MDDD-MULTI]    L"...",
204 [MEAS-MULTI]    L"ʽε",
205 [MELN-MULTI]    L"ʼε",
206 [MEMM-MULTI]    L"——",
207 [MHAS-MULTI]    L"ʽη",
208 [MHLN-MULTI]    L"ʼη",
209 [MIAS-MULTI]    L"ʽι",
210 [MILN-MULTI]    L"ʼι",
211 [MLCT-MULTI]    L"ct",
212 [MLFF-MULTI]    L"ff",
213 [MLFFI-MULTI]   L"ffi",
214 [MLFFL-MULTI]   L"ffl",
215 [MLFL-MULTI]    L"fl",
216 [MLFI-MULTI]    L"fi",
217 [MLLS-MULTI]    L"ɫɫ",
218 [MLST-MULTI]    L"st",
219 [MOAS-MULTI]    L"ʽο",
220 [MOLN-MULTI]    L"ʼο",
221 [MOR-MULTI]     L"or",
222 [MRAS-MULTI]    L"ʽρ",
223 [MRLN-MULTI]    L"ʼρ",
224 [MTT-MULTI]     L"~~",
225 [MUAS-MULTI]    L"ʽυ",
226 [MULN-MULTI]    L"ʼυ",
227 [MWAS-MULTI]    L"ʽω",
228 [MWLN-MULTI]    L"ʼω",
229 [MOE-MULTI]     L"oe",
230 [MES-MULTI]     L"  ",
231 };
232
233 #define risupper(r)     (L'A' <= (r) && (r) <= L'Z')
234 #define rislatin1(r)    (0xC0 <= (r) && (r) <= 0xFF)
235 #define rtolower(r)     ((r)-'A'+'a')
236
237 static Rune latin_fold_tab[] =
238 {
239 /*      Table to fold latin 1 characters to ASCII equivalents
240                         based at Rune value 0xc0
241
242          À    Á    Â    Ã    Ä    Å    Æ    Ç
243          È    É    Ê    Ë    Ì    Í    Î    Ï
244          Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
245          Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
246          à    á    â    ã    ä    å    æ    ç
247          è    é    ê    ë    ì    í    î    ï
248          ð    ñ    ò    ó    ô    õ    ö    ÷
249          ø    ù    ú    û    ü    ý    þ    ÿ
250 */
251         'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
252         'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
253         'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
254         'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
255         'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
256         'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
257         'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
258         'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y',
259 };
260
261 static Rune     *ttabstack[20];
262 static int      ntt;
263
264 /*
265  * tab is an array of n Assoc's, sorted by key.
266  * Look for key in tab, and return corresponding val
267  * or -1 if not there
268  */
269 long
270 lookassoc(Assoc *tab, int n, char *key)
271 {
272         Assoc *q;
273         long i, low, high;
274         int r;
275
276         for(low = -1, high = n; high > low+1; ){
277                 i = (high+low)/2;
278                 q = &tab[i];
279                 if((r=strcmp(key, q->key))<0)
280                         high = i;
281                 else if(r == 0)
282                         return q->val;
283                 else
284                         low=i;
285         }
286         return -1;
287 }
288
289 long
290 looknassoc(Nassoc *tab, int n, long key)
291 {
292         Nassoc *q;
293         long i, low, high;
294
295         for(low = -1, high = n; high > low+1; ){
296                 i = (high+low)/2;
297                 q = &tab[i];
298                 if(key < q->key)
299                         high = i;
300                 else if(key == q->key)
301                         return q->val;
302                 else
303                         low=i;
304         }
305         return -1;
306 }
307
308 void
309 err(char *fmt, ...)
310 {
311         char buf[1000];
312         va_list v;
313
314         va_start(v, fmt);
315         vsnprint(buf, sizeof(buf), fmt, v);
316         va_end(v);
317         fprint(2, "%s: %s\n", argv0, buf);
318 }
319
320 /*
321  * Write the rune r to bout, keeping track of line length
322  * and breaking the lines (at blanks) when they get too long
323  */
324 void
325 outrune(long r)
326 {
327         if(outinhibit)
328                 return;
329         if(++linelen > breaklen && r == L' ') {
330                 Bputc(bout, '\n');
331                 linelen = 0;
332         } else
333                 Bputrune(bout, r);
334 }
335
336 void
337 outrunes(Rune *rp)
338 {
339         Rune r;
340
341         while((r = *rp++) != 0)
342                 outrune(r);
343 }
344
345 /* like outrune, but when arg is know to be a char */
346 void
347 outchar(int c)
348 {
349         if(outinhibit)
350                 return;
351         if(++linelen > breaklen && c == ' ') {
352                 c ='\n';
353                 linelen = 0;
354         }
355         Bputc(bout, c);
356 }
357
358 void
359 outchars(char *s)
360 {
361         char c;
362
363         while((c = *s++) != 0)
364                 outchar(c);
365 }
366
367 void
368 outprint(char *fmt, ...)
369 {
370         char buf[1000];
371         va_list v;
372
373         va_start(v, fmt);
374         vsnprint(buf, sizeof(buf), fmt, v);
375         va_end(v);
376         outchars(buf);
377 }
378
379 void
380 outpiece(char *b, char *e)
381 {
382         int c, lastc;
383
384         lastc = 0;
385         while(b < e) {
386                 c = *b++;
387                 if(c == '\n')
388                         c = ' ';
389                 if(!(c == ' ' && lastc == ' '))
390                         outchar(c);
391                 lastc = c;
392         }
393 }
394
395 /*
396  * Go to new line if not already there; indent if ind != 0.
397  * If ind > 1, leave a blank line too.
398  * Slight hack: assume if current line is only one or two
399  * characters long, then they were spaces.
400  */
401 void
402 outnl(int ind)
403 {
404         if(outinhibit)
405                 return;
406         if(ind) {
407                 if(ind > 1) {
408                         if(linelen > 2)
409                                 Bputc(bout, '\n');
410                         Bprint(bout, "\n  ");
411                 } else if(linelen == 0)
412                         Bprint(bout, "  ");
413                 else if(linelen == 1)
414                         Bputc(bout, ' ');
415                 else if(linelen != 2)
416                         Bprint(bout, "\n  ");
417                 linelen = 2;
418         } else {
419                 if(linelen) {
420                         Bputc(bout, '\n');
421                         linelen = 0;
422                 }
423         }
424 }
425
426 /*
427  * Fold the runes in null-terminated rp.
428  * Use the sort(1) definition of folding (uppercase to lowercase,
429  * latin1-accented characters to corresponding unaccented chars)
430  */
431 void
432 fold(Rune *rp)
433 {
434         Rune r;
435
436         while((r = *rp) != 0) {
437                 if (rislatin1(r) && latin_fold_tab[r-0xc0])
438                                 r = latin_fold_tab[r-0xc0];
439                 if(risupper(r))
440                         r = rtolower(r);
441                 *rp++ = r;
442         }
443 }
444
445 /*
446  * Like fold, but put folded result into new
447  * (assumed to have enough space).
448  * old is a regular expression, but we know that
449  * metacharacters aren't affected
450  */
451 void
452 foldre(char *new, char *old)
453 {
454         Rune r;
455
456         while(*old) {
457                 old += chartorune(&r, old);
458                 if (rislatin1(r) && latin_fold_tab[r-0xc0])
459                                 r = latin_fold_tab[r-0xc0];
460                 if(risupper(r))
461                         r = rtolower(r);
462                 new += runetochar(new, &r);
463         }
464         *new = 0;
465 }
466
467 /*
468  *      acomp(s, t) returns:
469  *              -2 if s strictly precedes t
470  *              -1 if s is a prefix of t
471  *              0 if s is the same as t
472  *              1 if t is a prefix of s
473  *              2 if t strictly precedes s
474  */
475
476 int
477 acomp(Rune *s, Rune *t)
478 {
479         int cs, ct;
480
481         for(;;) {
482                 cs = *s;
483                 ct = *t;
484                 if(cs != ct)
485                         break;
486                 if(cs == 0)
487                         return 0;
488                 s++;
489                 t++;
490         }
491         if(cs == 0)
492                 return -1;
493         if(ct == 0)
494                 return 1;
495         if(cs < ct)
496                 return -2;
497         return 2;
498 }
499
500 /*
501  * Conversion of unsigned number to long, no overflow detection
502  */
503 long
504 runetol(Rune *r)
505 {
506         int c;
507         long n;
508
509         n = 0;
510         for(;; r++){
511                 c = *r;
512                 if(L'0'<=c && c<=L'9')
513                         c -= '0';
514                 else
515                         break;
516                 n = n*10 + c;
517         }
518         return n;
519 }
520
521 /*
522  * See if there is a rune corresponding to the accented
523  * version of r with accent acc (acc in [LIGS..LIGE-1]),
524  * and return it if so, else return NONE.
525  */
526 Rune
527 liglookup(Rune acc, Rune r)
528 {
529         Rune *p;
530
531         if(acc < LIGS || acc >= LIGE)
532                 return NONE;
533         for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
534                 if(*p == r)
535                         return *(p+1);
536         return NONE;
537 }
538
539 /*
540  * Maintain a translation table stack (a translation table
541  * is an array of Runes indexed by bytes or 7-bit bytes).
542  * If starting is true, push the curtab onto the stack
543  * and return newtab; else pop the top of the stack and
544  * return it.
545  * If curtab is 0, initialize the stack and return.
546  */
547 Rune *
548 changett(Rune *curtab, Rune *newtab, int starting)
549 {
550         if(curtab == 0) {
551                 ntt = 0;
552                 return 0;
553         }
554         if(starting) {
555                 if(ntt >= asize(ttabstack)) {
556                         if(debug)
557                                 err("translation stack overflow");
558                         return curtab;
559                 }
560                 ttabstack[ntt++] = curtab;
561                 return newtab;
562         } else {
563                 if(ntt == 0) {
564                         if(debug)
565                                 err("translation stack underflow");
566                         return curtab;
567                 }
568                 return ttabstack[--ntt];
569         }
570 }