7 {"oed", "Oxford English Dictionary, 2nd Ed.",
8 "/lib/dict/oed2", "/lib/dict/oed2index",
9 oednextoff, oedprintentry, oedprintkey},
10 {"ahd", "American Heritage Dictionary, 2nd College Ed.",
11 "/lib/ahd/DICT.DB", "/lib/ahd/index",
12 ahdnextoff, ahdprintentry, ahdprintkey},
13 {"pgw", "Project Gutenberg Webster Dictionary",
14 "/lib/dict/pgw", "/lib/dict/pgwindex",
15 pgwnextoff, pgwprintentry, pgwprintkey},
16 {"thesaurus", "Collins Thesaurus",
17 "/lib/dict/thesaurus", "/lib/dict/thesindex",
18 thesnextoff, thesprintentry, thesprintkey},
19 {"roget", "Project Gutenberg Roget's Thesaurus",
20 "/lib/dict/roget", "/lib/dict/rogetindex",
21 rogetnextoff, rogetprintentry, rogetprintkey},
23 {"ce", "Gendai Chinese->English",
24 "/lib/dict/world/sansdata/sandic24.dat",
25 "/lib/dict/world/sansdata/ceindex",
26 worldnextoff, worldprintentry, worldprintkey},
27 {"ceh", "Gendai Chinese->English (Hanzi index)",
28 "/lib/dict/world/sansdata/sandic24.dat",
29 "/lib/dict/world/sansdata/cehindex",
30 worldnextoff, worldprintentry, worldprintkey},
31 {"ec", "Gendai English->Chinese",
32 "/lib/dict/world/sansdata/sandic24.dat",
33 "/lib/dict/world/sansdata/ecindex",
34 worldnextoff, worldprintentry, worldprintkey},
36 {"dae", "Gyldendal Danish->English",
37 "/lib/dict/world/gylddata/sandic30.dat",
38 "/lib/dict/world/gylddata/daeindex",
39 worldnextoff, worldprintentry, worldprintkey},
40 {"eda", "Gyldendal English->Danish",
41 "/lib/dict/world/gylddata/sandic29.dat",
42 "/lib/dict/world/gylddata/edaindex",
43 worldnextoff, worldprintentry, worldprintkey},
45 {"due", "Wolters-Noordhoff Dutch->English",
46 "/lib/dict/world/woltdata/sandic07.dat",
47 "/lib/dict/world/woltdata/deindex",
48 worldnextoff, worldprintentry, worldprintkey},
49 {"edu", "Wolters-Noordhoff English->Dutch",
50 "/lib/dict/world/woltdata/sandic06.dat",
51 "/lib/dict/world/woltdata/edindex",
52 worldnextoff, worldprintentry, worldprintkey},
54 {"fie", "WSOY Finnish->English",
55 "/lib/dict/world/werndata/sandic32.dat",
56 "/lib/dict/world/werndata/fieindex",
57 worldnextoff, worldprintentry, worldprintkey},
58 {"efi", "WSOY English->Finnish",
59 "/lib/dict/world/werndata/sandic31.dat",
60 "/lib/dict/world/werndata/efiindex",
61 worldnextoff, worldprintentry, worldprintkey},
63 {"fe", "Collins French->English",
64 "/lib/dict/fe", "/lib/dict/feindex",
65 pcollnextoff, pcollprintentry, pcollprintkey},
66 {"ef", "Collins English->French",
67 "/lib/dict/ef", "/lib/dict/efindex",
68 pcollnextoff, pcollprintentry, pcollprintkey},
70 {"ge", "Collins German->English",
71 "/lib/dict/ge", "/lib/dict/geindex",
72 pcollgnextoff, pcollgprintentry, pcollgprintkey},
73 {"eg", "Collins English->German",
74 "/lib/dict/eg", "/lib/dict/egindex",
75 pcollgnextoff, pcollgprintentry, pcollgprintkey},
77 {"ie", "Collins Italian->English",
78 "/lib/dict/ie", "/lib/dict/ieindex",
79 pcollnextoff, pcollprintentry, pcollprintkey},
80 {"ei", "Collins English->Italian",
81 "/lib/dict/ei", "/lib/dict/eiindex",
82 pcollnextoff, pcollprintentry, pcollprintkey},
84 {"je", "Sanshusha Japanese->English",
85 "/lib/dict/world/sansdata/sandic18.dat",
86 "/lib/dict/world/sansdata/jeindex",
87 worldnextoff, worldprintentry, worldprintkey},
88 {"jek", "Sanshusha Japanese->English (Kanji index)",
89 "/lib/dict/world/sansdata/sandic18.dat",
90 "/lib/dict/world/sansdata/jekindex",
91 worldnextoff, worldprintentry, worldprintkey},
92 {"ej", "Sanshusha English->Japanese",
93 "/lib/dict/world/sansdata/sandic18.dat",
94 "/lib/dict/world/sansdata/ejindex",
95 worldnextoff, worldprintentry, worldprintkey},
97 {"tjeg", "Sanshusha technical Japanese->English,German",
98 "/lib/dict/world/sansdata/sandic16.dat",
99 "/lib/dict/world/sansdata/tjegindex",
100 worldnextoff, worldprintentry, worldprintkey},
101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
102 "/lib/dict/world/sansdata/sandic16.dat",
103 "/lib/dict/world/sansdata/tjegkindex",
104 worldnextoff, worldprintentry, worldprintkey},
105 {"tegj", "Sanshusha technical English->German,Japanese",
106 "/lib/dict/world/sansdata/sandic16.dat",
107 "/lib/dict/world/sansdata/tegjindex",
108 worldnextoff, worldprintentry, worldprintkey},
109 {"tgje", "Sanshusha technical German->Japanese,English",
110 "/lib/dict/world/sansdata/sandic16.dat",
111 "/lib/dict/world/sansdata/tgjeindex",
112 worldnextoff, worldprintentry, worldprintkey},
114 {"ne", "Kunnskapforlaget Norwegian->English",
115 "/lib/dict/world/kunndata/sandic28.dat",
116 "/lib/dict/world/kunndata/neindex",
117 worldnextoff, worldprintentry, worldprintkey},
118 {"en", "Kunnskapforlaget English->Norwegian",
119 "/lib/dict/world/kunndata/sandic27.dat",
120 "/lib/dict/world/kunndata/enindex",
121 worldnextoff, worldprintentry, worldprintkey},
123 {"re", "Leon Ungier Russian->English",
124 "/lib/dict/re", "/lib/dict/reindex",
125 simplenextoff, simpleprintentry, simpleprintkey},
126 {"er", "Leon Ungier English->Russian",
127 "/lib/dict/re", "/lib/dict/erindex",
128 simplenextoff, simpleprintentry, simpleprintkey},
130 {"se", "Collins Spanish->English",
131 "/lib/dict/se", "/lib/dict/seindex",
132 pcollnextoff, pcollprintentry, pcollprintkey},
133 {"es", "Collins English->Spanish",
134 "/lib/dict/es", "/lib/dict/esindex",
135 pcollnextoff, pcollprintentry, pcollprintkey},
137 {"swe", "Esselte Studium Swedish->English",
138 "/lib/dict/world/essedata/sandic34.dat",
139 "/lib/dict/world/essedata/sweindex",
140 worldnextoff, worldprintentry, worldprintkey},
141 {"esw", "Esselte Studium English->Swedish",
142 "/lib/dict/world/essedata/sandic33.dat",
143 "/lib/dict/world/essedata/eswindex",
144 worldnextoff, worldprintentry, worldprintkey},
146 {"movie", "Movies -- by title",
147 "/lib/movie/data", "/lib/dict/movtindex",
148 movienextoff, movieprintentry, movieprintkey},
149 {"moviea", "Movies -- by actor",
150 "/lib/movie/data", "/lib/dict/movaindex",
151 movienextoff, movieprintentry, movieprintkey},
152 {"movied", "Movies -- by director",
153 "/lib/movie/data", "/lib/dict/movdindex",
154 movienextoff, movieprintentry, movieprintkey},
156 {"slang", "English Slang",
157 "/lib/dict/slang", "/lib/dict/slangindex",
158 slangnextoff, slangprintentry, slangprintkey},
160 {"robert", "Robert Électronique",
161 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index",
162 robertnextoff, robertindexentry, robertprintkey},
163 {"robertv", "Robert Électronique - formes des verbes",
164 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex",
165 robertnextflex, robertflexentry, robertprintkey},
170 typedef struct Lig Lig;
172 Rune start; /* accent rune */
173 Rune *pairs; /* <char,accented version> pairs */
176 static Lig ligtab[Nligs] = {
177 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
178 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
179 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
180 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
181 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
182 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
183 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"},
184 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
185 [LDTB-LIGS] {L'.', L""},
186 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
187 [LFRB-LIGS] {L'̯', L""},
188 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"},
189 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
190 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
191 [LASP-LIGS] {L'ʽ', L""},
192 [LLEN-LIGS] {L'ʼ', L""},
193 [LBRB-LIGS] {L'̮', L""}
196 Rune *multitab[Nmulti] = {
213 [MLFFI-MULTI] L"ffi",
214 [MLFFL-MULTI] L"ffl",
233 #define risupper(r) (L'A' <= (r) && (r) <= L'Z')
234 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
235 #define rtolower(r) ((r)-'A'+'a')
237 static Rune latin_fold_tab[] =
239 /* Table to fold latin 1 characters to ASCII equivalents
240 based at Rune value 0xc0
251 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
252 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
253 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
254 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
255 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
256 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
257 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
258 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
261 static Rune *ttabstack[20];
265 * tab is an array of n Assoc's, sorted by key.
266 * Look for key in tab, and return corresponding val
270 lookassoc(Assoc *tab, int n, char *key)
276 for(low = -1, high = n; high > low+1; ){
279 if((r=strcmp(key, q->key))<0)
290 looknassoc(Nassoc *tab, int n, long key)
295 for(low = -1, high = n; high > low+1; ){
300 else if(key == q->key)
315 vsnprint(buf, sizeof(buf), fmt, v);
317 fprint(2, "%s: %s\n", argv0, buf);
321 * Write the rune r to bout, keeping track of line length
322 * and breaking the lines (at blanks) when they get too long
329 if(++linelen > breaklen && r == L' ') {
341 while((r = *rp++) != 0)
345 /* like outrune, but when arg is know to be a char */
351 if(++linelen > breaklen && c == ' ') {
363 while((c = *s++) != 0)
368 outprint(char *fmt, ...)
374 vsnprint(buf, sizeof(buf), fmt, v);
380 outpiece(char *b, char *e)
389 if(!(c == ' ' && lastc == ' '))
396 * Go to new line if not already there; indent if ind != 0.
397 * If ind > 1, leave a blank line too.
398 * Slight hack: assume if current line is only one or two
399 * characters long, then they were spaces.
411 } else if(linelen == 0)
413 else if(linelen == 1)
415 else if(linelen != 2)
427 * Fold the runes in null-terminated rp.
428 * Use the sort(1) definition of folding (uppercase to lowercase,
429 * latin1-accented characters to corresponding unaccented chars)
436 while((r = *rp) != 0) {
437 if (rislatin1(r) && latin_fold_tab[r-0xc0])
438 r = latin_fold_tab[r-0xc0];
446 * Like fold, but put folded result into new
447 * (assumed to have enough space).
448 * old is a regular expression, but we know that
449 * metacharacters aren't affected
452 foldre(char *new, char *old)
457 old += chartorune(&r, old);
458 if (rislatin1(r) && latin_fold_tab[r-0xc0])
459 r = latin_fold_tab[r-0xc0];
462 new += runetochar(new, &r);
468 * acomp(s, t) returns:
469 * -2 if s strictly precedes t
470 * -1 if s is a prefix of t
471 * 0 if s is the same as t
472 * 1 if t is a prefix of s
473 * 2 if t strictly precedes s
477 acomp(Rune *s, Rune *t)
501 * Conversion of unsigned number to long, no overflow detection
512 if(L'0'<=c && c<=L'9')
522 * See if there is a rune corresponding to the accented
523 * version of r with accent acc (acc in [LIGS..LIGE-1]),
524 * and return it if so, else return NONE.
527 liglookup(Rune acc, Rune r)
531 if(acc < LIGS || acc >= LIGE)
533 for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
540 * Maintain a translation table stack (a translation table
541 * is an array of Runes indexed by bytes or 7-bit bytes).
542 * If starting is true, push the curtab onto the stack
543 * and return newtab; else pop the top of the stack and
545 * If curtab is 0, initialize the stack and return.
548 changett(Rune *curtab, Rune *newtab, int starting)
555 if(ntt >= asize(ttabstack)) {
557 err("translation stack overflow");
560 ttabstack[ntt++] = curtab;
565 err("translation stack underflow");
568 return ttabstack[--ntt];