]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/dict/oed.c
realemu: implement IDIV, mark 0xE0000 writeable, fix DIV overfow trap
[plan9front.git] / sys / src / cmd / dict / oed.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5
6 enum {
7         Buflen=1000,
8         Maxaux=5,
9 };
10
11 /* Possible tags */
12 enum {
13         A,              /* author in quote (small caps) */
14         B,              /* bold */
15         Ba,             /* author inside bib */
16         Bch,            /* builtup chem component */
17         Bib,            /* surrounds word 'in' for bibliographic ref */
18         Bl,             /* bold */
19         Bo,             /* bond over */
20         Bu,             /* bond under */
21         Cb,             /* ? block of stuff (indent) */
22         Cf,             /* cross ref to another entry (italics) */
23         Chem,           /* chemistry formula */
24         Co,             /* over (preceding sum, integral, etc.) */
25         Col,            /* column of table (aux just may be r) */
26         Cu,             /* under (preceding sum, integral, etc.) */
27         Dat,            /* date */
28         Db,             /* def block? indent */
29         Dn,             /* denominator of fraction */
30         E,              /* main entry */
31         Ed,             /* editor's comments (in [...]) */
32         Etym,           /* etymology (in [...]) */
33         Fq,             /* frequency count (superscript) */
34         Form,           /* formula */
35         Fr,             /* fraction (contains <nu>, then <dn>) */
36         Gk,             /* greek (transliteration) */
37         Gr,             /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
38         Hg,             /* headword group */
39         Hm,             /* homonym (superscript) */
40         Hw,             /* headword (bold) */
41         I,              /* italics */
42         Il,             /* italic list? */
43         In,             /* inferior (subscript) */
44         L,              /* row of col of table */
45         La,             /* status or usage label (italic) */
46         Lc,             /* chapter/verse sort of thing for works */
47         N,              /* note (smaller type) */
48         Nu,             /* numerator of fraction */
49         Ov,             /* needs overline */
50         P,              /* paragraph (indent) */
51         Ph,             /* pronunciation (transliteration) */
52         Pi,             /* pile (frac without line) */
53         Pqp,            /* subblock of quote */
54         Pr,             /* pronunciation (in (...)) */
55         Ps,             /* position (e.g., adv.) (italic) */
56         Pt,             /* part (in lc) */
57         Q,              /* quote in quote block */
58         Qd,             /* quote date (bold) */
59         Qig,            /* quote number (greek) */
60         Qla,            /* status or usage label in quote (italic) */
61         Qp,             /* quote block (small type, indent) */
62         Qsn,            /* quote number */
63         Qt,             /* quote words */
64         R,              /* roman type style */
65         Rx,             /* relative cross reference (e.g., next) */
66         S,              /* another form? (italic) */
67         S0,             /* sense (sometimes surrounds several sx's) */
68         S1,             /* sense (aux num: indented bold letter) */
69         S2,             /* sense (aux num: indented bold capital rom num) */
70         S3,             /* sense (aux num: indented number of asterisks) */
71         S4,             /* sense (aux num: indented bold number) */
72         S5,             /* sense (aux num: indented number of asterisks) */
73         S6,             /* subsense (aux num: bold letter) */
74         S7a,            /* subsense (aux num: letter) */
75         S7n,            /* subsense (aux num: roman numeral) */
76         Sc,             /* small caps */
77         Sgk,            /* subsense (aux num: transliterated greek) */
78         Sn,             /* sense of subdefinition (aux num: roman letter) */
79         Ss,             /* sans serif */
80         Ssb,            /* sans serif bold */
81         Ssi,            /* sans serif italic */
82         Su,             /* superior (superscript) */
83         Sub,            /* subdefinition */
84         Table,          /* table (aux cols=number of columns) */
85         Tt,             /* title? (italics) */
86         Vd,             /* numeric label for variant form */
87         Ve,             /* variant entry */
88         Vf,             /* variant form (light bold) */
89         Vfl,            /* list of vf's (starts with Also or Forms) */
90         W,              /* work (e.g., Beowulf) (italics) */
91         X,              /* cross reference to main word (small caps) */
92         Xd,             /* cross reference to quotation by date */
93         Xi,             /* internal cross reference ? (italic) */
94         Xid,            /* cross reference identifer, in quote ? */
95         Xs,             /* cross reference sense (lower number) */
96         Xr,             /* list of x's */
97         Ntag            /* end of tags */
98 };
99
100 /* Assoc tables must be sorted on first field */
101
102 static Assoc tagtab[] = {
103         {"a",           A},
104         {"b",           B},
105         {"ba",          Ba},
106         {"bch",         Bch},
107         {"bib",         Bib},
108         {"bl",          Bl},
109         {"bo",          Bo},
110         {"bu",          Bu},
111         {"cb",          Cb},
112         {"cf",          Cf},
113         {"chem",        Chem},
114         {"co",          Co},
115         {"col",         Col},
116         {"cu",          Cu},
117         {"dat",         Dat},
118         {"db",          Db},
119         {"dn",          Dn},
120         {"e",           E},
121         {"ed",          Ed},
122         {"et",          Etym},
123         {"etym",        Etym},
124         {"form",        Form},
125         {"fq",          Fq},
126         {"fr",          Fr},
127         {"frac",        Fr},
128         {"gk",          Gk},
129         {"gr",          Gr},
130         {"hg",          Hg},
131         {"hm",          Hm},
132         {"hw",          Hw},
133         {"i",           I},
134         {"il",          Il},
135         {"in",          In},
136         {"l",           L},
137         {"la",          La},
138         {"lc",          Lc},
139         {"n",           N},
140         {"nu",          Nu},
141         {"ov",          Ov},
142         {"p",           P},
143         {"ph",          Ph},
144         {"pi",          Pi},
145         {"pqp",         Pqp},
146         {"pr",          Pr},
147         {"ps",          Ps},
148         {"pt",          Pt},
149         {"q",           Q},
150         {"qd",          Qd},
151         {"qig",         Qig},
152         {"qla",         Qla},
153         {"qp",          Qp},
154         {"qsn",         Qsn},
155         {"qt",          Qt},
156         {"r",           R},
157         {"rx",          Rx},
158         {"s",           S},
159         {"s0",          S0},
160         {"s1",          S1},
161         {"s2",          S2},
162         {"s3",          S3},
163         {"s4",          S4},
164         {"s5",          S5},
165         {"s6",          S6},
166         {"s7a",         S7a},
167         {"s7n",         S7n},
168         {"sc",          Sc},
169         {"sgk",         Sgk},
170         {"sn",          Sn},
171         {"ss",          Ss,},
172         {"ssb",         Ssb},
173         {"ssi",         Ssi},
174         {"su",          Su},
175         {"sub",         Sub},
176         {"table",       Table},
177         {"tt",          Tt},
178         {"vd",          Vd},
179         {"ve",          Ve},
180         {"vf",          Vf},
181         {"vfl",         Vfl},
182         {"w",           W},
183         {"x",           X},
184         {"xd",          Xd},
185         {"xi",          Xi},
186         {"xid",         Xid},
187         {"xr",          Xr},
188         {"xs",          Xs},
189 };
190
191 /* Possible tag auxilliary info */
192 enum {
193         Cols,           /* number of columns in a table */
194         Num,            /* letter or number, for a sense */
195         St,             /* status (e.g., obs) */
196         Naux
197 };
198
199 static Assoc auxtab[] = {
200         {"cols",        Cols},
201         {"num",         Num},
202         {"st",          St}
203 };
204
205 static Assoc spectab[] = {
206         {"3on4",        L'¾'},
207         {"3on8",        L'⅜'},
208         {"Aacu",        L'Á'},
209         {"Aang",        L'Å'},
210         {"Abarab",      L'Ā'},
211         {"Acirc",       L'Â'},
212         {"Ae",          L'Æ'},
213         {"Agrave",      L'À'},
214         {"Alpha",       L'Α'},
215         {"Amac",        L'Ā'},
216         {"Asg",         L'Ʒ'},         /* Unicyle. Cf "Sake" */
217         {"Auml",        L'Ä'},
218         {"Beta",        L'Β'},
219         {"Cced",        L'Ç'},
220         {"Chacek",      L'Č'},
221         {"Chi",         L'Χ'},
222         {"Chirho",      L'☧'},                /* Chi Rho U+2627 */
223         {"Csigma",      L'Ϛ'},
224         {"Delta",       L'Δ'},
225         {"Eacu",        L'É'},
226         {"Ecirc",       L'Ê'},
227         {"Edh",         L'Ð'},
228         {"Epsilon",     L'Ε'},
229         {"Eta",         L'Η'},
230         {"Gamma",       L'Γ'},
231         {"Iacu",        L'Í'},
232         {"Icirc",       L'Î'},
233         {"Imac",        L'Ī'},
234         {"Integ",       L'∫'},
235         {"Iota",        L'Ι'},
236         {"Kappa",       L'Κ'},
237         {"Koppa",       L'Ϟ'},
238         {"Lambda",      L'Λ'},
239         {"Lbar",        L'Ł'},
240         {"Mu",          L'Μ'},
241         {"Naira",       L'₦'},                /* should have bar through */
242         {"Nplus",       L'N'},          /* should have plus above */
243         {"Ntilde",      L'Ñ'},
244         {"Nu",          L'Ν'},
245         {"Oacu",        L'Ó'},
246         {"Obar",        L'Ø'},
247         {"Ocirc",       L'Ô'},
248         {"Oe",          L'Œ'},
249         {"Omega",       L'Ω'},
250         {"Omicron",     L'Ο'},
251         {"Ouml",        L'Ö'},
252         {"Phi",         L'Φ'},
253         {"Pi",          L'Π'},
254         {"Psi",         L'Ψ'},
255         {"Rho",         L'Ρ'},
256         {"Sacu",        L'Ś'},
257         {"Sigma",       L'Σ'},
258         {"Summ",        L'∑'},
259         {"Tau",         L'Τ'},
260         {"Th",          L'Þ'},
261         {"Theta",       L'Θ'},
262         {"Tse",         L'Ц'},
263         {"Uacu",        L'Ú'},
264         {"Ucirc",       L'Û'},
265         {"Upsilon",     L'Υ'},
266         {"Uuml",        L'Ü'},
267         {"Wyn",         L'ƿ'},         /* wynn U+01BF */
268         {"Xi",          L'Ξ'},
269         {"Ygh",         L'Ʒ'},         /* Yogh U+01B7 */
270         {"Zeta",        L'Ζ'},
271         {"Zh",          L'Ʒ'},         /* looks like Yogh. Cf "Sake" */
272         {"a",           L'a'},          /* ante */
273         {"aacu",        L'á'},
274         {"aang",        L'å'},
275         {"aasper",      MAAS},
276         {"abreve",      L'ă'},
277         {"acirc",       L'â'},
278         {"acu",         LACU},
279         {"ae",          L'æ'},
280         {"agrave",      L'à'},
281         {"ahook",       L'ą'},
282         {"alenis",      MALN},
283         {"alpha",       L'α'},
284         {"amac",        L'ā'},
285         {"amp",         L'&'},
286         {"and",         MAND},
287         {"ang",         LRNG},
288         {"angle",       L'∠'},
289         {"ankh",        L'☥'},                /* ankh U+2625 */
290         {"ante",        L'a'},          /* before (year) */
291         {"aonq",        MAOQ},
292         {"appreq",      L'≃'},
293         {"aquar",       L'♒'},
294         {"arDadfull",   L'ض'},         /* Dad U+0636 */
295         {"arHa",        L'ح'},         /* haa U+062D */
296         {"arTa",        L'ت'},         /* taa U+062A */
297         {"arain",       L'ع'},         /* ain U+0639 */
298         {"arainfull",   L'ع'},         /* ain U+0639 */
299         {"aralif",      L'ا'},         /* alef U+0627 */
300         {"arba",        L'ب'},         /* baa U+0628 */
301         {"arha",        L'ه'},         /* ha U+0647 */
302         {"aries",       L'♈'},
303         {"arnun",       L'ن'},         /* noon U+0646 */
304         {"arnunfull",   L'ن'},         /* noon U+0646 */
305         {"arpa",        L'ه'},         /* ha U+0647 */
306         {"arqoph",      L'ق'},         /* qaf U+0642 */
307         {"arshinfull",  L'ش'},         /* sheen U+0634 */
308         {"arta",        L'ت'},         /* taa U+062A */
309         {"artafull",    L'ت'},         /* taa U+062A */
310         {"artha",       L'ث'},         /* thaa U+062B */
311         {"arwaw",       L'و'},         /* waw U+0648 */
312         {"arya",        L'ي'},         /* ya U+064A */
313         {"aryafull",    L'ي'},         /* ya U+064A */
314         {"arzero",      L'٠'},         /* indic zero U+0660 */
315         {"asg",         L'ʒ'},         /* unicycle character. Cf "hallow" */
316         {"asper",       LASP},
317         {"assert",      L'⊢'},
318         {"astm",        L'⁂'},                /* asterism: should be upside down */
319         {"at",          L'@'},
320         {"atilde",      L'ã'},
321         {"auml",        L'ä'},
322         {"ayin",        L'ع'},         /* arabic ain U+0639 */
323         {"b1",          L'-'},          /* single bond */
324         {"b2",          L'='},          /* double bond */
325         {"b3",          L'≡'},                /* triple bond */
326         {"bbar",        L'ƀ'},         /* b with bar U+0180 */
327         {"beta",        L'β'},
328         {"bigobl",      L'/'},
329         {"blC",         L'C'},          /* should be black letter */
330         {"blJ",         L'J'},          /* should be black letter */
331         {"blU",         L'U'},          /* should be black letter */
332         {"blb",         L'b'},          /* should be black letter */
333         {"blozenge",    L'◊'},                /* U+25CA; should be black */
334         {"bly",         L'y'},          /* should be black letter */
335         {"bra",         MBRA},
336         {"brbl",        LBRB},
337         {"breve",       LBRV},
338         {"bslash",      L'\\'},
339         {"bsquare",     L'■'},                /* black square U+25A0 */
340         {"btril",       L'◀'},                /* U+25C0 */
341         {"btrir",       L'▶'},                /* U+25B6 */
342         {"c",           L'c'},          /* circa */
343         {"cab",         L'〉'},
344         {"cacu",        L'ć'},
345         {"canc",        L'♋'},
346         {"capr",        L'♑'},
347         {"caret",       L'^'},
348         {"cb",          L'}'},
349         {"cbigb",       L'}'},
350         {"cbigpren",    L')'},
351         {"cbigsb",      L']'},
352         {"cced",        L'ç'},
353         {"cdil",        LCED},
354         {"cdsb",        L'〛'},                /* ]] U+301b */
355         {"cent",        L'¢'},
356         {"chacek",      L'č'},
357         {"chi",         L'χ'},
358         {"circ",        LRNG},
359         {"circa",       L'c'},          /* about (year) */
360         {"circbl",      L'̥'},         /* ring below accent U+0325 */
361         {"circle",      L'○'},                /* U+25CB */
362         {"circledot",   L'⊙'},
363         {"click",       L'ʖ'},
364         {"club",        L'♣'},
365         {"comtime",     L'C'},
366         {"conj",        L'☌'},
367         {"cprt",        L'©'},
368         {"cq",          L'\''},
369         {"cqq",         L'”'},
370         {"cross",       L'✠'},                /* maltese cross U+2720 */
371         {"crotchet",    L'♩'},
372         {"csb",         L']'},
373         {"ctilde",      L'c'},          /* +tilde */
374         {"ctlig",       MLCT},
375         {"cyra",        L'а'},
376         {"cyre",        L'е'},
377         {"cyrhard",     L'ъ'},
378         {"cyrjat",      L'ѣ'},
379         {"cyrm",        L'м'},
380         {"cyrn",        L'н'},
381         {"cyrr",        L'р'},
382         {"cyrsoft",     L'ь'},
383         {"cyrt",        L'т'},
384         {"cyry",        L'ы'},
385         {"dag",         L'†'},
386         {"dbar",        L'đ'},
387         {"dblar",       L'⇋'},
388         {"dblgt",       L'≫'},
389         {"dbllt",       L'≪'},
390         {"dced",        L'd'},          /* +cedilla */
391         {"dd",          MDD},
392         {"ddag",        L'‡'},
393         {"ddd",         MDDD},
394         {"decr",        L'↓'},
395         {"deg",         L'°'},
396         {"dele",        L'd'},          /* should be dele */
397         {"delta",       L'δ'},
398         {"descnode",    L'☋'},                /* descending node U+260B */
399         {"diamond",     L'♢'},
400         {"digamma",     L'ϝ'},
401         {"div",         L'÷'},
402         {"dlessi",      L'ı'},
403         {"dlessj1",     L'j'},          /* should be dotless */
404         {"dlessj2",     L'j'},          /* should be dotless */
405         {"dlessj3",     L'j'},          /* should be dotless */
406         {"dollar",      L'$'},
407         {"dotab",       LDOT},
408         {"dotbl",       LDTB},
409         {"drachm",      L'ʒ'},
410         {"dubh",        L'-'},
411         {"eacu",        L'é'},
412         {"earth",       L'♁'},
413         {"easper",      MEAS},
414         {"ebreve",      L'ĕ'},
415         {"ecirc",       L'ê'},
416         {"edh",         L'ð'},
417         {"egrave",      L'è'},
418         {"ehacek",      L'ě'},
419         {"ehook",       L'ę'},
420         {"elem",        L'∊'},
421         {"elenis",      MELN},
422         {"em",          L'—'},
423         {"emac",        L'ē'},
424         {"emem",        MEMM},
425         {"en",          L'–'},
426         {"epsilon",     L'ε'},
427         {"equil",       L'⇋'},
428         {"ergo",        L'∴'},
429         {"es",          MES},
430         {"eszett",      L'ß'},
431         {"eta",         L'η'},
432         {"eth",         L'ð'},
433         {"euml",        L'ë'},
434         {"expon",       L'↑'},
435         {"fact",        L'!'},
436         {"fata",        L'ɑ'},
437         {"fatpara",     L'¶'},         /* should have fatter, filled in bowl */
438         {"female",      L'♀'},
439         {"ffilig",      MLFFI},
440         {"fflig",       MLFF},
441         {"ffllig",      MLFFL},
442         {"filig",       MLFI},
443         {"flat",        L'♭'},
444         {"fllig",       MLFL},
445         {"frE",         L'E'},          /* should be curly */
446         {"frL",         L'L'},          /* should be curly */
447         {"frR",         L'R'},          /* should be curly */
448         {"frakB",       L'B'},          /* should have fraktur style */
449         {"frakG",       L'G'},
450         {"frakH",       L'H'},
451         {"frakI",       L'I'},
452         {"frakM",       L'M'},
453         {"frakU",       L'U'},
454         {"frakX",       L'X'},
455         {"frakY",       L'Y'},
456         {"frakh",       L'h'},
457         {"frbl",        LFRB},
458         {"frown",       LFRN},
459         {"fs",          L' '},
460         {"fsigma",      L'ς'},
461         {"gAacu",       L'Á'},         /* should be Α+acute */
462         {"gaacu",       L'α'},         /* +acute */
463         {"gabreve",     L'α'},         /* +breve */
464         {"gafrown",     L'α'},         /* +frown */
465         {"gagrave",     L'α'},         /* +grave */
466         {"gamac",       L'α'},         /* +macron */
467         {"gamma",       L'γ'},
468         {"gauml",       L'α'},         /* +umlaut */
469         {"ge",          L'≧'},
470         {"geacu",       L'ε'},         /* +acute */
471         {"gegrave",     L'ε'},         /* +grave */
472         {"ghacu",       L'η'},         /* +acute */
473         {"ghfrown",     L'η'},         /* +frown */
474         {"ghgrave",     L'η'},         /* +grave */
475         {"ghmac",       L'η'},         /* +macron */
476         {"giacu",       L'ι'},         /* +acute */
477         {"gibreve",     L'ι'},         /* +breve */
478         {"gifrown",     L'ι'},         /* +frown */
479         {"gigrave",     L'ι'},         /* +grave */
480         {"gimac",       L'ι'},         /* +macron */
481         {"giuml",       L'ι'},         /* +umlaut */
482         {"glagjat",     L'ѧ'},
483         {"glots",       L'ˀ'},
484         {"goacu",       L'ο'},         /* +acute */
485         {"gobreve",     L'ο'},         /* +breve */
486         {"grave",       LGRV},
487         {"gt",          L'>'},
488         {"guacu",       L'υ'},         /* +acute */
489         {"gufrown",     L'υ'},         /* +frown */
490         {"gugrave",     L'υ'},         /* +grave */
491         {"gumac",       L'υ'},         /* +macron */
492         {"guuml",       L'υ'},         /* +umlaut */
493         {"gwacu",       L'ω'},         /* +acute */
494         {"gwfrown",     L'ω'},         /* +frown */
495         {"gwgrave",     L'ω'},         /* +grave */
496         {"hacek",       LHCK},
497         {"halft",       L'⌈'},
498         {"hash",        L'#'},
499         {"hasper",      MHAS},
500         {"hatpath",     L'ֲ'},         /* hataf patah U+05B2 */
501         {"hatqam",      L'ֳ'},         /* hataf qamats U+05B3 */
502         {"hatseg",      L'ֱ'},         /* hataf segol U+05B1 */
503         {"hbar",        L'ħ'},
504         {"heart",       L'♡'},
505         {"hebaleph",    L'א'},         /* aleph U+05D0 */
506         {"hebayin",     L'ע'},         /* ayin U+05E2 */
507         {"hebbet",      L'ב'},         /* bet U+05D1 */
508         {"hebbeth",     L'ב'},         /* bet U+05D1 */
509         {"hebcheth",    L'ח'},         /* bet U+05D7 */
510         {"hebdaleth",   L'ד'},         /* dalet U+05D3 */
511         {"hebgimel",    L'ג'},         /* gimel U+05D2 */
512         {"hebhe",       L'ה'},         /* he U+05D4 */
513         {"hebkaph",     L'כ'},         /* kaf U+05DB */
514         {"heblamed",    L'ל'},         /* lamed U+05DC */
515         {"hebmem",      L'מ'},         /* mem U+05DE */
516         {"hebnun",      L'נ'},         /* nun U+05E0 */
517         {"hebnunfin",   L'ן'},         /* final nun U+05DF */
518         {"hebpe",       L'פ'},         /* pe U+05E4 */
519         {"hebpedag",    L'ף'},         /* final pe? U+05E3 */
520         {"hebqoph",     L'ק'},         /* qof U+05E7 */
521         {"hebresh",     L'ר'},         /* resh U+05E8 */
522         {"hebshin",     L'ש'},         /* shin U+05E9 */
523         {"hebtav",      L'ת'},         /* tav U+05EA */
524         {"hebtsade",    L'צ'},         /* tsadi U+05E6 */
525         {"hebwaw",      L'ו'},         /* vav? U+05D5 */
526         {"hebyod",      L'י'},         /* yod U+05D9 */
527         {"hebzayin",    L'ז'},         /* zayin U+05D6 */
528         {"hgz",         L'ʒ'},         /* ??? Cf "alet" */
529         {"hireq",       L'ִ'},         /* U+05B4 */
530         {"hlenis",      MHLN},
531         {"hook",        LOGO},
532         {"horizE",      L'E'},          /* should be on side */
533         {"horizP",      L'P'},          /* should be on side */
534         {"horizS",      L'∽'},
535         {"horizT",      L'⊣'},
536         {"horizb",      L'{'},          /* should be underbrace */
537         {"ia",          L'α'},
538         {"iacu",        L'í'},
539         {"iasper",      MIAS},
540         {"ib",          L'β'},
541         {"ibar",        L'ɨ'},
542         {"ibreve",      L'ĭ'},
543         {"icirc",       L'î'},
544         {"id",          L'δ'},
545         {"ident",       L'≡'},
546         {"ie",          L'ε'},
547         {"ifilig",      MLFI},
548         {"ifflig",      MLFF},
549         {"ig",          L'γ'},
550         {"igrave",      L'ì'},
551         {"ih",          L'η'},
552         {"ii",          L'ι'},
553         {"ik",          L'κ'},
554         {"ilenis",      MILN},
555         {"imac",        L'ī'},
556         {"implies",     L'⇒'},
557         {"index",       L'☞'},
558         {"infin",       L'∞'},
559         {"integ",       L'∫'},
560         {"intsec",      L'∩'},
561         {"invpri",      L'ˏ'},
562         {"iota",        L'ι'},
563         {"iq",          L'ψ'},
564         {"istlig",      MLST},
565         {"isub",        L'ϵ'},         /* iota below accent */
566         {"iuml",        L'ï'},
567         {"iz",          L'ζ'},
568         {"jup",         L'♃'},
569         {"kappa",       L'κ'},
570         {"koppa",       L'ϟ'},
571         {"lambda",      L'λ'},
572         {"lar",         L'←'},
573         {"lbar",        L'ł'},
574         {"le",          L'≦'},
575         {"lenis",       LLEN},
576         {"leo",         L'♌'},
577         {"lhalfbr",     L'⌈'},
578         {"lhshoe",      L'⊃'},
579         {"libra",       L'♎'},
580         {"llswing",     MLLS},
581         {"lm",          L'ː'},
582         {"logicand",    L'∧'},
583         {"logicor",     L'∨'},
584         {"longs",       L'ʃ'},
585         {"lrar",        L'↔'},
586         {"lt",          L'<'},
587         {"ltappr",      L'≾'},
588         {"ltflat",      L'∠'},
589         {"lumlbl",      L'l'},          /* +umlaut below */
590         {"mac",         LMAC},
591         {"male",        L'♂'},
592         {"mc",          L'c'},          /* should be raised */
593         {"merc",        L'☿'},                /* mercury U+263F */
594         {"min",         L'−'},
595         {"moonfq",      L'☽'},                /* first quarter moon U+263D */
596         {"moonlq",      L'☾'},                /* last quarter moon U+263E */
597         {"msylab",      L'm'},          /* +sylab (ˌ) */
598         {"mu",          L'μ'},
599         {"nacu",        L'ń'},
600         {"natural",     L'♮'},
601         {"neq",         L'≠'},
602         {"nfacu",       L'′'},
603         {"nfasper",     L'ʽ'},
604         {"nfbreve",     L'˘'},
605         {"nfced",       L'¸'},
606         {"nfcirc",      L'ˆ'},
607         {"nffrown",     L'⌢'},
608         {"nfgra",       L'ˋ'},
609         {"nfhacek",     L'ˇ'},
610         {"nfmac",       L'¯'},
611         {"nftilde",     L'˜'},
612         {"nfuml",       L'¨'},
613         {"ng",          L'ŋ'},
614         {"not",         L'¬'},
615         {"notelem",     L'∉'},
616         {"ntilde",      L'ñ'},
617         {"nu",          L'ν'},
618         {"oab",         L'〈'},
619         {"oacu",        L'ó'},
620         {"oasper",      MOAS},
621         {"ob",          L'{'},
622         {"obar",        L'ø'},
623         {"obigb",       L'{'},          /* should be big */
624         {"obigpren",    L'('},
625         {"obigsb",      L'['},          /* should be big */
626         {"obreve",      L'ŏ'},
627         {"ocirc",       L'ô'},
628         {"odsb",        L'〚'},                /* [[ U+301A */
629         {"oe",          L'œ'},
630         {"oeamp",       L'&'},
631         {"ograve",      L'ò'},
632         {"ohook",       L'o'},          /* +hook */
633         {"olenis",      MOLN},
634         {"omac",        L'ō'},
635         {"omega",       L'ω'},
636         {"omicron",     L'ο'},
637         {"ope",         L'ɛ'},
638         {"opp",         L'☍'},
639         {"oq",          L'`'},
640         {"oqq",         L'“'},
641         {"or",          MOR},
642         {"osb",         L'['},
643         {"otilde",      L'õ'},
644         {"ouml",        L'ö'},
645         {"ounce",       L'℥'},                /* ounce U+2125 */
646         {"ovparen",     L'⌢'},                /* should be sideways ( */
647         {"p",           L'′'},
648         {"pa",          L'∂'},
649         {"page",        L'P'},
650         {"pall",        L'ʎ'},
651         {"paln",        L'ɲ'},
652         {"par",         PAR},
653         {"para",        L'¶'},
654         {"pbar",        L'p'},          /* +bar */
655         {"per",         L'℘'},                /* per U+2118 */
656         {"phi",         L'φ'},
657         {"phi2",        L'ϕ'},
658         {"pi",          L'π'},
659         {"pisces",      L'♓'},
660         {"planck",      L'ħ'},
661         {"plantinJ",    L'J'},          /* should be script */
662         {"pm",          L'±'},
663         {"pmil",        L'‰'},
664         {"pp",          L'″'},
665         {"ppp",         L'‴'},
666         {"prop",        L'∝'},
667         {"psi",         L'ψ'},
668         {"pstlg",       L'£'},
669         {"q",           L'?'},          /* should be raised */
670         {"qamets",      L'ֳ'},         /* U+05B3 */
671         {"quaver",      L'♪'},
672         {"rar",         L'→'},
673         {"rasper",      MRAS},
674         {"rdot",        L'·'},
675         {"recipe",      L'℞'},                /* U+211E */
676         {"reg",         L'®'},
677         {"revC",        L'Ɔ'},         /* open O U+0186 */
678         {"reva",        L'ɒ'},
679         {"revc",        L'ɔ'},
680         {"revope",      L'ɜ'},
681         {"revr",        L'ɹ'},
682         {"revsc",       L'˒'},         /* upside-down semicolon */
683         {"revv",        L'ʌ'},
684         {"rfa",         L'o'},          /* +hook (Cf "goal") */
685         {"rhacek",      L'ř'},
686         {"rhalfbr",     L'⌉'},
687         {"rho",         L'ρ'},
688         {"rhshoe",      L'⊂'},
689         {"rlenis",      MRLN},
690         {"rsylab",      L'r'},          /* +sylab */
691         {"runash",      L'F'},          /* should be runic 'ash' */
692         {"rvow",        L'˔'},
693         {"sacu",        L'ś'},
694         {"sagit",       L'♐'},
695         {"sampi",       L'ϡ'},
696         {"saturn",      L'♄'},
697         {"sced",        L'ş'},
698         {"schwa",       L'ə'},
699         {"scorpio",     L'♏'},
700         {"scrA",        L'A'},          /* should be script */
701         {"scrC",        L'C'},
702         {"scrE",        L'E'},
703         {"scrF",        L'F'},
704         {"scrI",        L'I'},
705         {"scrJ",        L'J'},
706         {"scrL",        L'L'},
707         {"scrO",        L'O'},
708         {"scrP",        L'P'},
709         {"scrQ",        L'Q'},
710         {"scrS",        L'S'},
711         {"scrT",        L'T'},
712         {"scrb",        L'b'},
713         {"scrd",        L'd'},
714         {"scrh",        L'h'},
715         {"scrl",        L'l'},
716         {"scruple",     L'℈'},                /* U+2108 */
717         {"sdd",         L'ː'},
718         {"sect",        L'§'},
719         {"semE",        L'∃'},
720         {"sh",          L'ʃ'},
721         {"shacek",      L'š'},
722         {"sharp",       L'♯'},
723         {"sheva",       L'ְ'},         /* U+05B0 */
724         {"shti",        L'ɪ'},
725         {"shtsyll",     L'∪'},
726         {"shtu",        L'ʊ'},
727         {"sidetri",     L'⊲'},
728         {"sigma",       L'σ'},
729         {"since",       L'∵'},
730         {"slge",        L'≥'},                /* should have slanted line under */
731         {"slle",        L'≤'},                /* should have slanted line under */
732         {"sm",          L'ˈ'},
733         {"smm",         L'ˌ'},
734         {"spade",       L'♠'},
735         {"sqrt",        L'√'},
736         {"square",      L'□'},                /* U+25A1 */
737         {"ssChi",       L'Χ'},         /* should be sans serif */
738         {"ssIota",      L'Ι'},
739         {"ssOmicron",   L'Ο'},
740         {"ssPi",        L'Π'},
741         {"ssRho",       L'Ρ'},
742         {"ssSigma",     L'Σ'},
743         {"ssTau",       L'Τ'},
744         {"star",        L'*'},
745         {"stlig",       MLST},
746         {"sup2",        L'²'},
747         {"supgt",       L'˃'},
748         {"suplt",       L'˂'},
749         {"sur",         L'ʳ'},
750         {"swing",       L'∼'},
751         {"tau",         L'τ'},
752         {"taur",        L'♉'},
753         {"th",          L'þ'},
754         {"thbar",       L'þ'},         /* +bar */
755         {"theta",       L'θ'},
756         {"thinqm",      L'?'},          /* should be thinner */
757         {"tilde",       LTIL},
758         {"times",       L'×'},
759         {"tri",         L'∆'},
760         {"trli",        L'‖'},
761         {"ts",          L' '},
762         {"uacu",        L'ú'},
763         {"uasper",      MUAS},
764         {"ubar",        L'u'},          /* +bar */
765         {"ubreve",      L'ŭ'},
766         {"ucirc",       L'û'},
767         {"udA",         L'∀'},
768         {"udT",         L'⊥'},
769         {"uda",         L'ɐ'},
770         {"udh",         L'ɥ'},
771         {"udqm",        L'¿'},
772         {"udpsi",       L'⋔'},
773         {"udtr",        L'∇'},
774         {"ugrave",      L'ù'},
775         {"ulenis",      MULN},
776         {"umac",        L'ū'},
777         {"uml",         LUML},
778         {"undl",        L'ˍ'},         /* underline accent */
779         {"union",       L'∪'},
780         {"upsilon",     L'υ'},
781         {"uuml",        L'ü'},
782         {"vavpath",     L'ו'},         /* vav U+05D5 (+patah) */
783         {"vavsheva",    L'ו'},         /* vav U+05D5 (+sheva) */
784         {"vb",          L'|'},
785         {"vddd",        L'⋮'},
786         {"versicle2",   L'℣'},                /* U+2123 */
787         {"vinc",        L'¯'},
788         {"virgo",       L'♍'},
789         {"vpal",        L'ɟ'},
790         {"vvf",         L'ɣ'},
791         {"wasper",      MWAS},
792         {"wavyeq",      L'≈'},
793         {"wlenis",      MWLN},
794         {"wyn",         L'ƿ'},         /* wynn U+01BF */
795         {"xi",          L'ξ'},
796         {"yacu",        L'ý'},
797         {"ycirc",       L'ŷ'},
798         {"ygh",         L'ʒ'},
799         {"ymac",        L'y'},          /* +macron */
800         {"yuml",        L'ÿ'},
801         {"zced",        L'z'},          /* +cedilla */
802         {"zeta",        L'ζ'},
803         {"zh",          L'ʒ'},
804         {"zhacek",      L'ž'},
805 };
806 /*
807    The following special characters don't have close enough
808    equivalents in Unicode, so aren't in the above table.
809         22n             2^(2^n) Cf Fermat
810         2on4            2/4
811         Bantuo          Bantu O. Cf Otshi-herero
812         Car             C with circular arrow on top
813         albrtime        cut-time: C with vertical line
814         ardal           Cf dental
815         bantuo          Bantu o. Cf Otshi-herero
816         bbc1            single chem bond below
817         bbc2            double chem bond below
818         bbl1            chem bond like /
819         bbl2            chem bond like //
820         bbr1            chem bond like \
821         bbr2            chem bond \\
822         bcop1           copper symbol. Cf copper
823         bcop2           copper symbol. Cf copper
824         benchm          Cf benchmark
825         btc1            single chem bond above
826         btc2            double chem bond above
827         btl1            chem bond like \
828         btl2            chem bond like \\
829         btr1            chem bond like /
830         btr2            chem bond line //
831         burman          Cf Burman
832         devph           sanskrit letter. Cf ph
833         devrfls         sanskrit letter. Cf cerebral
834         duplong[12]     musical note
835         egchi           early form of chi
836         eggamma[12]     early form of gamma
837         egiota          early form of iota
838         egkappa         early form of kappa
839         eglambda        early form of lambda
840         egmu[12]        early form of mu
841         egnu[12]        early form of nu
842         egpi[123]       early form of pi
843         egrho[12]       early form of rho
844         egsampi         early form of sampi
845         egsan           early form of san
846         egsigma[12]     early form of sigma
847         egxi[123]       early form of xi
848         elatS           early form of S
849         elatc[12]       early form of C
850         elatg[12]       early form of G
851         glagjeri        Slavonic Glagolitic jeri
852         glagjeru        Slavonic Glagolitic jeru
853         hypolem         hypolemisk (line with underdot)
854         lhrbr           lower half }
855         longmord        long mordent
856         mbwvow          backwards scretched C. Cf retract.
857         mord            music symbol.  Cf mordent
858         mostra          Cf direct
859         ohgcirc         old form of circumflex
860         oldbeta         old form of β. Cf perturbate
861         oldsemibr[12]   old forms of semibreve. Cf prolation
862         ormg            old form of g. Cf G
863         para[12345]     form of ¶
864         pauseo          musical pause sign
865         pauseu          musical pause sign
866         pharyng         Cf pharyngal
867         ragr            Black letter ragged r
868         repetn          musical repeat. Cf retort
869         segno           musical segno sign
870         semain[12]      semitic ain
871         semhe           semitic he
872         semheth         semitic heth
873         semkaph         semitic kaph
874         semlamed[12]    semitic lamed
875         semmem          semitic mem
876         semnum          semitic nun
877         sempe           semitic pe
878         semqoph[123]    semitic qoph
879         semresh         semitic resh
880         semtav[1234]    semitic tav
881         semyod          semitic yod
882         semzayin[123]   semitic zayin
883         shtlong[12]     U with underbar. Cf glyconic
884         sigmatau        σ,τ combination
885         squaver         sixteenth note
886         sqbreve         square musical breve note
887         swast           swastika
888         uhrbr           upper half of big }
889         versicle1               Cf versicle
890  */
891
892
893 static Rune normtab[128] = {
894         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
895 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
896         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
897 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
898         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
899 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
900         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
901 /*30*/  L'0',   L'1',   L'2',   L'3',   L'4',   L'5',   L'6',   L'7',
902         L'8',   L'9',   L':',   L';',   TAGS,   L'=',   TAGE,   L'?',
903 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
904         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
905 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
906         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
907 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
908         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
909 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
910         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
911 };
912 static Rune phtab[128] = {
913         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
914 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
915         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
916 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
917         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
918 /*20*/  L' ',   L'!',   L'ˈ',  L'#',   L'$',   L'ˌ',  L'æ',  L'\'',
919         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
920 /*30*/  L'0',   L'1',   L'2',   L'ɜ',  L'4',   L'5',   L'6',   L'7',
921         L'8',   L'ø',  L'ː',  L';',   TAGS,   L'=',   TAGE,   L'?',
922 /*40*/  L'ə',  L'ɑ',  L'B',   L'C',   L'ð',  L'ɛ',  L'F',   L'G',
923         L'H',   L'ɪ',  L'J',   L'K',   L'L',   L'M',   L'ŋ',  L'ɔ',
924 /*50*/  L'P',   L'ɒ',  L'R',   L'ʃ',  L'θ',  L'ʊ',  L'ʌ',  L'W',
925         L'X',   L'Y',   L'ʒ',  L'[',   L'\\',  L']',   L'^',   L'_',
926 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
927         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
928 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
929         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
930 };
931 static Rune grtab[128] = {
932         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
933 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
934         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
935 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
936         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
937 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
938         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
939 /*30*/  L'0',   L'1',   L'2',   L'3',   L'4',   L'5',   L'6',   L'7',
940         L'8',   L'9',   L':',   L';',   TAGS,   L'=',   TAGE,   L'?',
941 /*40*/  L'@',   L'Α',  L'Β',  L'Ξ',  L'Δ',  L'Ε',  L'Φ',  L'Γ',
942         L'Η',  L'Ι',  L'Ϛ',  L'Κ',  L'Λ',  L'Μ',  L'Ν',  L'Ο',
943 /*50*/  L'Π',  L'Θ',  L'Ρ',  L'Σ',  L'Τ',  L'Υ',  L'V',   L'Ω',
944         L'Χ',  L'Ψ',  L'Ζ',  L'[',   L'\\',  L']',   L'^',   L'_',
945 /*60*/  L'`',   L'α',  L'β',  L'ξ',  L'δ',  L'ε',  L'φ',  L'γ',
946         L'η',  L'ι',  L'ς',  L'κ',  L'λ',  L'μ',  L'ν',  L'ο',
947 /*70*/  L'π',  L'θ',  L'ρ',  L'σ',  L'τ',  L'υ',  L'v',   L'ω',
948         L'χ',  L'ψ',  L'ζ',  L'{',   L'|',   L'}',   L'~',   NONE,
949 };
950 static Rune subtab[128] = {
951         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
952 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
953         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
954 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
955         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
956 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
957         L'₍', L'₎', L'*',   L'₊', L',',   L'₋', L'.',   L'/',
958 /*30*/  L'₀', L'₁', L'₂', L'₃', L'₄', L'₅', L'₆', L'₇',
959         L'₈', L'₉', L':',   L';',   TAGS,   L'₌', TAGE,   L'?',
960 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
961         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
962 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
963         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
964 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
965         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
966 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
967         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
968 };
969 static Rune suptab[128] = {
970         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
971 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
972         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
973 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
974         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
975 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
976         L'⁽', L'⁾', L'*',   L'⁺', L',',   L'⁻', L'.',   L'/',
977 /*30*/  L'⁰', L'¹',  L'²',  L'³',  L'⁴', L'⁵', L'⁶', L'⁷',
978         L'⁸', L'⁹', L':',   L';',   TAGS,   L'⁼', TAGE,   L'?',
979 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
980         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
981 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
982         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
983 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
984         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
985 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
986         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
987 };
988
989 static int      tagstarts;
990 static char     tag[Buflen];
991 static int      naux;
992 static char     auxname[Maxaux][Buflen];
993 static char     auxval[Maxaux][Buflen];
994 static char     spec[Buflen];
995 static char     *auxstate[Naux];        /* vals for most recent tag */
996 static Entry    curentry;
997 #define cursize (curentry.end-curentry.start)
998
999 static char     *getspec(char *, char *);
1000 static char     *gettag(char *, char *);
1001 static void     dostatus(void);
1002
1003 /*
1004  * cmd is one of:
1005  *    'p': normal print
1006  *    'h': just print headwords
1007  *    'P': print raw
1008  */
1009 void
1010 oedprintentry(Entry e, int cmd)
1011 {
1012         char *p, *pe;
1013         int t, a, i;
1014         long r, rprev, rlig;
1015         Rune *transtab;
1016
1017         p = e.start;
1018         pe = e.end;
1019         transtab = normtab;
1020         rprev = NONE;
1021         changett(0, 0, 0);
1022         curentry = e;
1023         if(cmd == 'h')
1024                 outinhibit = 1;
1025         while(p < pe) {
1026                 if(cmd == 'r') {
1027                         outchar(*p++);
1028                         continue;
1029                 }
1030                 r = transtab[(*p++)&0x7F];
1031                 if(r < NONE) {
1032                         /* Emit the rune, but buffer in case of ligature */
1033                         if(rprev != NONE)
1034                                 outrune(rprev);
1035                         rprev = r;
1036                 } else if(r == SPCS) {
1037                         /* Start of special character name */
1038                         p = getspec(p, pe);
1039                         r = lookassoc(spectab, asize(spectab), spec);
1040                         if(r == -1) {
1041                                 if(debug)
1042                                         err("spec %ld %d %s",
1043                                                 e.doff, cursize, spec);
1044                                 r = L'�';
1045                         }
1046                         if(r >= LIGS && r < LIGE) {
1047                                 /* handle possible ligature */
1048                                 rlig = liglookup(r, rprev);
1049                                 if(rlig != NONE)
1050                                         rprev = rlig;   /* overwrite rprev */
1051                                 else {
1052                                         /* could print accent, but let's not */
1053                                         if(rprev != NONE) outrune(rprev);
1054                                         rprev = NONE;
1055                                 }
1056                         } else if(r >= MULTI && r < MULTIE) {
1057                                 if(rprev != NONE) {
1058                                         outrune(rprev);
1059                                         rprev = NONE;
1060                                 }
1061                                 outrunes(multitab[r-MULTI]);
1062                         } else if(r == PAR) {
1063                                 if(rprev != NONE) {
1064                                         outrune(rprev);
1065                                         rprev = NONE;
1066                                 }
1067                                 outnl(1);
1068                         } else {
1069                                 if(rprev != NONE) outrune(rprev);
1070                                 rprev = r;
1071                         }
1072                 } else if(r == TAGS) {
1073                         /* Start of tag name */
1074                         if(rprev != NONE) {
1075                                 outrune(rprev);
1076                                 rprev = NONE;
1077                         }
1078                         p = gettag(p, pe);
1079                         t = lookassoc(tagtab, asize(tagtab), tag);
1080                         if(t == -1) {
1081                                 if(debug)
1082                                         err("tag %ld %d %s",
1083                                                 e.doff, cursize, tag);
1084                                 continue;
1085                         }
1086                         for(i = 0; i < Naux; i++)
1087                                 auxstate[i] = 0;
1088                         for(i = 0; i < naux; i++) {
1089                                 a = lookassoc(auxtab, asize(auxtab), auxname[i]);
1090                                 if(a == -1) {
1091                                         if(debug)
1092                                                 err("aux %ld %d %s",
1093                                                         e.doff, cursize, auxname[i]);
1094                                 } else
1095                                         auxstate[a] = auxval[i];
1096                         }
1097                         switch(t){
1098                         case E:
1099                         case Ve:
1100                                 outnl(0);
1101                                 if(tagstarts)
1102                                         dostatus();
1103                                 break;
1104                         case Ed:
1105                         case Etym:
1106                                 outchar(tagstarts? '[' : ']');
1107                                 break;
1108                         case Pr:
1109                                 outchar(tagstarts? '(' : ')');
1110                                 break;
1111                         case In:
1112                                 transtab = changett(transtab, subtab, tagstarts);
1113                                 break;
1114                         case Hm:
1115                         case Su:
1116                         case Fq:
1117                                 transtab = changett(transtab, suptab, tagstarts);
1118                                 break;
1119                         case Gk:
1120                                 transtab = changett(transtab, grtab, tagstarts);
1121                                 break;
1122                         case Ph:
1123                                 transtab = changett(transtab, phtab, tagstarts);
1124                                 break;
1125                         case Hw:
1126                                 if(cmd == 'h') {
1127                                         if(!tagstarts)
1128                                                 outchar(' ');
1129                                         outinhibit = !tagstarts;
1130                                 }
1131                                 break;
1132                         case S0:
1133                         case S1:
1134                         case S2:
1135                         case S3:
1136                         case S4:
1137                         case S5:
1138                         case S6:
1139                         case S7a:
1140                         case S7n:
1141                         case Sn:
1142                         case Sgk:
1143                                 if(tagstarts) {
1144                                         outnl(2);
1145                                         dostatus();
1146                                         if(auxstate[Num]) {
1147                                                 if(t == S3 || t == S5) {
1148                                                         i = atoi(auxstate[Num]);
1149                                                         while(i--)
1150                                                                 outchar('*');
1151                                                         outchars("  ");
1152                                                 } else if(t == S7a || t == S7n || t == Sn) {
1153                                                         outchar('(');
1154                                                         outchars(auxstate[Num]);
1155                                                         outchars(") ");
1156                                                 } else if(t == Sgk) {
1157                                                         i = grtab[auxstate[Num][0]];
1158                                                         if(i != NONE)
1159                                                                 outrune(i);
1160                                                         outchars(".  ");
1161                                                 } else {
1162                                                         outchars(auxstate[Num]);
1163                                                         outchars(".  ");
1164                                                 }
1165                                         }
1166                                 }
1167                                 break;
1168                         case Cb:
1169                         case Db:
1170                         case Qp:
1171                         case P:
1172                                 if(tagstarts)
1173                                         outnl(1);
1174                                 break;
1175                         case Table:
1176                                 /*
1177                                  * Todo: gather columns, justify them, etc.
1178                                  * For now, just let colums come out as rows
1179                                  */
1180                                 if(!tagstarts)
1181                                         outnl(0);
1182                                 break;
1183                         case Col:
1184                                 if(tagstarts)
1185                                         outnl(0);
1186                                 break;
1187                         case Dn:
1188                                 if(tagstarts)
1189                                         outchar('/');
1190                                 break;
1191                         }
1192                 }
1193         }
1194         if(cmd == 'h') {
1195                 outinhibit = 0;
1196                 outnl(0);
1197         }
1198 }
1199
1200 /*
1201  * Return offset into bdict where next oed entry after fromoff starts.
1202  * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
1203  */
1204 long
1205 oednextoff(long fromoff)
1206 {
1207         long a, n;
1208         int c;
1209
1210         a = Bseek(bdict, fromoff, 0);
1211         if(a < 0)
1212                 return -1;
1213         n = 0;
1214         for(;;) {
1215                 c = Bgetc(bdict);
1216                 if(c < 0)
1217                         break;
1218                 if(c == '<') {
1219                         c = Bgetc(bdict);
1220                         if(c == 'e') {
1221                                 c = Bgetc(bdict);
1222                                 if(c == '>' || c == ' ')
1223                                         n = 3;
1224                         } else if(c == 'v' && Bgetc(bdict) == 'e') {
1225                                 c = Bgetc(bdict);
1226                                 if(c == '>' || c == ' ')
1227                                         n = 4;
1228                         }
1229                         if(n)
1230                                 break;
1231                 }
1232         }
1233         return (Boffset(bdict)-n);
1234 }
1235
1236 static char *prkey =
1237 "KEY TO THE PRONUNCIATION\n"
1238 "\n"
1239 "I. CONSONANTS\n"
1240 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1241 "\n"
1242 "g as in go (gəʊ)\n"
1243 "h  ...  ho! (həʊ)\n"
1244 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
1245 "(r)...  her (hɜː(r))\n"
1246 "s  ...  see (siː), success (səkˈsɜs)\n"
1247 "w  ...  wear (wɛə(r))\n"
1248 "hw ...  when (hwɛn)\n"
1249 "j  ...  yes (jɛs)\n"
1250 "θ  ...  thin (θin), bath (bɑːθ)\n"
1251 "ð  ...  then (ðɛn), bathe (beɪð)\n"
1252 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
1253 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
1254 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1255 "dʒ ...  judge (dʒʌdʒ)\n"
1256 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1257 "ŋg ...  finger (ˈfiŋgə(r))\n"
1258 "\n"
1259 "Foreign\n"
1260 "ʎ as in It. seraglio (serˈraʎo)\n"
1261 "ɲ  ...  Fr. cognac (kɔɲak)\n"
1262 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
1263 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1264 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
1265 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1266 "ɥ  ...  Fr. cuisine (kɥizin)\n"
1267 "\n"
1268 "II. VOWELS AND DIPTHONGS\n"
1269 "\n"
1270 "Short\n"
1271 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1272 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
1273 "æ  ...  pat (pæt)\n"
1274 "ʌ  ...  putt (pʌt)\n"
1275 "ɒ  ...  pot (pɒt)\n"
1276 "ʊ  ...  put (pʊt)\n"
1277 "ə  ...  another (əˈnʌðə(r))\n"
1278 "(ə)...  beaten (ˈbiːt(ə)n)\n"
1279 "i  ...  Fr. si (si)\n"
1280 "e  ...  Fr. bébé (bebe)\n"
1281 "a  ...  Fr. mari (mari)\n"
1282 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
1283 "ɔ  ...  Fr. homme (ɔm)\n"
1284 "o  ...  Fr. eau (o)\n"
1285 "ø  ...  Fr. peu (pø)\n"
1286 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
1287 "u  ...  Fr. douce (dus)\n"
1288 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
1289 "y  ...  Fr. du (dy)\n"
1290 "\n"
1291 "Long\n"
1292 "iː as in bean (biːn)\n"
1293 "ɑː ...  barn (bɑːn)\n"
1294 "ɔː ...  born (bɔːn)\n"
1295 "uː ...  boon (buːn)\n"
1296 "ɜː ...  burn (bɜːn)\n"
1297 "eː ...  Ger. Schnee (ʃneː)\n"
1298 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
1299 "aː ...  Ger. Tag (taːk)\n"
1300 "oː ...  Ger. Sohn (zoːn)\n"
1301 "øː ...  Ger. Goethe (gøːtə)\n"
1302 "yː ...  Ger. grün (gryːn)\n"
1303 "\n"
1304 "Nasal\n"
1305 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1306 "ã  ...  Fr. franc (frã)\n"
1307 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
1308 "œ˜ ...  Fr. un (œ˜)\n"
1309 "\n"
1310 "Dipthongs, etc.\n"
1311 "eɪ as in bay (beɪ)\n"
1312 "aɪ ...  buy (baɪ)\n"
1313 "ɔɪ ...  boy (bɔɪ)\n"
1314 "əʊ ...  no (nəʊ)\n"
1315 "aʊ ...  now (naʊ)\n"
1316 "ɪə ...  peer (pɪə(r))\n"
1317 "ɛə ...  pair (pɛə(r))\n"
1318 "ʊə ...  tour (tʊə(r))\n"
1319 "ɔə ...  boar (bɔə(r))\n"
1320 "\n"
1321 "III. STRESS\n"
1322 "\n"
1323 "Main stress: ˈ preceding stressed syllable\n"
1324 "Secondary stress: ˌ preceding stressed syllable\n"
1325 "\n"
1326 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1327 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1328
1329 void
1330 oedprintkey(void)
1331 {
1332         Bprint(bout, "%s", prkey);
1333 }
1334
1335 /*
1336  * f points just after a '&', fe points at end of entry.
1337  * Accumulate the special name, starting after the &
1338  * and continuing until the next '.', in spec[].
1339  * Return pointer to char after '.'.
1340  */
1341 static char *
1342 getspec(char *f, char *fe)
1343 {
1344         char *t;
1345         int c, i;
1346
1347         t = spec;
1348         i = sizeof spec;
1349         while(--i > 0) {
1350                 c = *f++;
1351                 if(c == '.' || f == fe)
1352                         break;
1353                 *t++ = c;
1354         }
1355         *t = 0;
1356         return f;
1357 }
1358
1359 /*
1360  * f points just after '<'; fe points at end of entry.
1361  * Expect next characters from bin to match:
1362  *  [/][^ >]+( [^>=]+=[^ >]+)*>
1363  *      tag   auxname auxval
1364  * Accumulate the tag and its auxilliary information in
1365  * tag[], auxname[][] and auxval[][].
1366  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1367  * Set naux to the number of aux pairs found.
1368  * Return pointer to after final '>'.
1369  */
1370 static char *
1371 gettag(char *f, char *fe)
1372 {
1373         char *t;
1374         int c, i;
1375
1376         t = tag;
1377         c = *f++;
1378         if(c == '/')
1379                 tagstarts = 0;
1380         else {
1381                 tagstarts = 1;
1382                 *t++ = c;
1383         }
1384         i = Buflen;
1385         naux = 0;
1386         while(--i > 0) {
1387                 c = *f++;
1388                 if(c == '>' || f == fe)
1389                         break;
1390                 if(c == ' ') {
1391                         *t = 0;
1392                         t = auxname[naux];
1393                         i = Buflen;
1394                         if(naux < Maxaux-1)
1395                                 naux++;
1396                 } else if(naux && c == '=') {
1397                         *t = 0;
1398                         t = auxval[naux-1];
1399                         i = Buflen;
1400                 } else
1401                         *t++ = c;
1402         }
1403         *t = 0;
1404         return f;
1405 }
1406
1407 static void
1408 dostatus(void)
1409 {
1410         char *s;
1411
1412         s = auxstate[St];
1413         if(s) {
1414                 if(strcmp(s, "obs") == 0)
1415                         outrune(L'†');
1416                 else if(strcmp(s, "ali") == 0)
1417                         outrune(L'‖');
1418                 else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
1419                         outrune(L'¶');
1420                 else if(strcmp(s, "xref") == 0)
1421                         {/* nothing */}
1422                 else if(debug)
1423                         err("status %ld %d %s", curentry.doff, cursize, s);
1424         }
1425 }