]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/dict/pgw.c
realemu: implement IDIV, mark 0xE0000 writeable, fix DIV overfow trap
[plan9front.git] / sys / src / cmd / dict / pgw.c
1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */
2 #include <u.h>
3 #include <libc.h>
4 #include <bio.h>
5 #include "dict.h"
6
7 enum {
8         Buflen=1000,
9         Maxaux=5,
10 };
11
12 /* Possible tags */
13 enum {
14         B,              /* Bold */
15         Blockquote,     /* Block quote */
16         Br,             /* Break line */
17         Cd,             /* ? coloquial data */
18         Col,            /* ? Coloquial */
19         Def,            /* Definition */
20         Hw,             /* Head Word */
21         I,              /* Italics */
22         P,              /* Paragraph */
23         Pos,            /* Part of Speach */
24         Sn,             /* Sense */
25         U,              /* ? cross reference*/
26         Wf,             /* ? word form */
27         Ntag            /* end of tags */
28 };
29
30 /* Assoc tables must be sorted on first field */
31
32 static Assoc tagtab[] = {
33         {"b",                   B},
34         {"blockquote",  Blockquote},
35         {"BR",          Br},
36         {"cd",          Cd},
37         {"col",         Col},
38         {"def",         Def},
39         {"hw",          Hw},
40         {"i",                   I},
41         {"p",                   P},
42         {"pos",         Pos},
43         {"sn",          Sn},
44         {"u",                   U},
45         {"wf",          Wf},
46 };
47
48 /* Possible tag auxilliary info */
49 enum {
50         Cols,           /* number of columns in a table */
51         Num,            /* letter or number, for a sense */
52         St,             /* status (e.g., obs) */
53         Naux
54 };
55
56 static Assoc auxtab[] = {
57         {"cols",        Cols},
58         {"num",         Num},
59         {"st",          St}
60 };
61
62 static Assoc spectab[] = {
63         {"3on4",        L'¾'},
64         {"AElig",               L'Æ'},
65         {"Aacute",      L'Á'},
66         {"Aang",        L'Å'},
67         {"Abarab",      L'Ā'},
68         {"Acirc",       L'Â'},
69         {"Agrave",      L'À'},
70         {"Alpha",       L'Α'},
71         {"Amacr",       L'Ā'},
72         {"Asg",         L'Ʒ'},         /* Unicyle. Cf "Sake" */
73         {"Auml",        L'Ä'},
74         {"Beta",        L'Β'},
75         {"Cced",        L'Ç'},
76         {"Chacek",      L'Č'},
77         {"Chi",         L'Χ'},
78         {"Chirho",      L'☧'},                /* Chi Rho U+2627 */
79         {"Csigma",      L'Ϛ'},
80         {"Delta",       L'Δ'},
81         {"Eacute",      L'É'},
82         {"Ecirc",       L'Ê'},
83         {"Edh",         L'Ð'},
84         {"Epsilon",     L'Ε'},
85         {"Eta",         L'Η'},
86         {"Gamma",       L'Γ'},
87         {"Iacute",      L'Í'},
88         {"Icirc",       L'Î'},
89         {"Imacr",       L'Ī'},
90         {"Integ",       L'∫'},
91         {"Iota",        L'Ι'},
92         {"Kappa",       L'Κ'},
93         {"Koppa",       L'Ϟ'},
94         {"Lambda",      L'Λ'},
95         {"Lbar",        L'Ł'},
96         {"Mu",          L'Μ'},
97         {"Naira",       L'N'},          /* should have bar through */
98         {"Nplus",       L'N'},          /* should have plus above */
99         {"Ntilde",      L'Ñ'},
100         {"Nu",          L'Ν'},
101         {"Oacute",      L'Ó'},
102         {"Obar",        L'Ø'},
103         {"Ocirc",       L'Ô'},
104         {"Oe",          L'Œ'},
105         {"Omega",       L'Ω'},
106         {"Omicron",     L'Ο'},
107         {"Ouml",        L'Ö'},
108         {"Phi",         L'Φ'},
109         {"Pi",          L'Π'},
110         {"Psi",         L'Ψ'},
111         {"Rho",         L'Ρ'},
112         {"Sacute",      L'Ś'},
113         {"Sigma",       L'Σ'},
114         {"Summ",        L'∑'},
115         {"Tau",         L'Τ'},
116         {"Th",          L'Þ'},
117         {"Theta",       L'Θ'},
118         {"Tse",         L'Ц'},
119         {"Uacute",      L'Ú'},
120         {"Ucirc",       L'Û'},
121         {"Upsilon",     L'Υ'},
122         {"Uuml",        L'Ü'},
123         {"Wyn",         L'ƿ'},         /* wynn U+01BF */
124         {"Xi",          L'Ξ'},
125         {"Ygh",         L'Ʒ'},         /* Yogh U+01B7 */
126         {"Zeta",        L'Ζ'},
127         {"Zh",          L'Ʒ'},         /* looks like Yogh. Cf "Sake" */
128         {"a",           L'a'},          /* ante */
129         {"aacute",      L'á'},
130         {"aang",        L'å'},
131         {"aasper",      MAAS},
132         {"abreve",      L'ă'},
133         {"acirc",       L'â'},
134         {"acute",               LACU},
135         {"aelig",               L'æ'},
136         {"agrave",      L'à'},
137         {"ahook",       L'ą'},
138         {"alenis",      MALN},
139         {"alpha",       L'α'},
140         {"amacr",       L'ā'},
141         {"amp",         L'&'},
142         {"and",         MAND},
143         {"ang",         LRNG},
144         {"angle",       L'∠'},
145         {"ankh",        L'☥'},                /* ankh U+2625 */
146         {"ante",        L'a'},          /* before (year) */
147         {"aonq",        MAOQ},
148         {"appreq",      L'≃'},
149         {"aquar",       L'♒'},
150         {"arDadfull",   L'ض'},         /* Dad U+0636 */
151         {"arHa",        L'ح'},         /* haa U+062D */
152         {"arTa",        L'ت'},         /* taa U+062A */
153         {"arain",       L'ع'},         /* ain U+0639 */
154         {"arainfull",   L'ع'},         /* ain U+0639 */
155         {"aralif",      L'ا'},         /* alef U+0627 */
156         {"arba",        L'ب'},         /* baa U+0628 */
157         {"arha",        L'ه'},         /* ha U+0647 */
158         {"aries",       L'♈'},
159         {"arnun",       L'ن'},         /* noon U+0646 */
160         {"arnunfull",   L'ن'},         /* noon U+0646 */
161         {"arpa",        L'ه'},         /* ha U+0647 */
162         {"arqoph",      L'ق'},         /* qaf U+0642 */
163         {"arshinfull",  L'ش'},         /* sheen U+0634 */
164         {"arta",        L'ت'},         /* taa U+062A */
165         {"artafull",    L'ت'},         /* taa U+062A */
166         {"artha",       L'ث'},         /* thaa U+062B */
167         {"arwaw",       L'و'},         /* waw U+0648 */
168         {"arya",        L'ي'},         /* ya U+064A */
169         {"aryafull",    L'ي'},         /* ya U+064A */
170         {"arzero",      L'٠'},         /* indic zero U+0660 */
171         {"asg",         L'ʒ'},         /* unicycle character. Cf "hallow" */
172         {"asper",       LASP},
173         {"assert",      L'⊢'},
174         {"astm",        L'⁂'},                /* asterism: should be upside down */
175         {"at",          L'@'},
176         {"atilde",      L'ã'},
177         {"auml",        L'ä'},
178         {"ayin",        L'ع'},         /* arabic ain U+0639 */
179         {"b1",          L'-'},          /* single bond */
180         {"b2",          L'='},          /* double bond */
181         {"b3",          L'≡'},                /* triple bond */
182         {"bbar",        L'ƀ'},         /* b with bar U+0180 */
183         {"beta",        L'β'},
184         {"bigobl",      L'/'},
185         {"blC",         L'C'},          /* should be black letter */
186         {"blJ",         L'J'},          /* should be black letter */
187         {"blU",         L'U'},          /* should be black letter */
188         {"blb",         L'b'},          /* should be black letter */
189         {"blozenge",    L'◊'},                /* U+25CA; should be black */
190         {"bly",         L'y'},          /* should be black letter */
191         {"bra",         MBRA},
192         {"brbl",        LBRB},
193         {"breve",       LBRV},
194         {"bslash",      L'\\'},
195         {"bsquare",     L'■'},                /* black square U+25A0 */
196         {"btril",       L'◀'},                /* U+25C0 */
197         {"btrir",       L'▶'},                /* U+25B6 */
198         {"c",           L'c'},          /* circa */
199         {"cab",         L'〉'},
200         {"cacute",      L'ć'},
201         {"canc",        L'♋'},
202         {"capr",        L'♑'},
203         {"caret",       L'^'},
204         {"cb",          L'}'},
205         {"cbigb",       L'}'},
206         {"cbigpren",    L')'},
207         {"cbigsb",      L']'},
208         {"cced",        L'ç'},
209         {"cdil",        LCED},
210         {"cdsb",        L'〛'},                /* ]] U+301b */
211         {"cent",        L'¢'},
212         {"chacek",      L'č'},
213         {"chi",         L'χ'},
214         {"circ",        LRNG},
215         {"circa",       L'c'},          /* about (year) */
216         {"circbl",      L'̥'},         /* ring below accent U+0325 */
217         {"circle",      L'○'},                /* U+25CB */
218         {"circledot",   L'⊙'},
219         {"click",       L'ʖ'},
220         {"club",        L'♣'},
221         {"comtime",     L'C'},
222         {"conj",        L'☌'},
223         {"cprt",        L'©'},
224         {"cq",          L'\''},
225         {"cqq",         L'”'},
226         {"cross",       L'✠'},                /* maltese cross U+2720 */
227         {"crotchet",    L'♩'},
228         {"csb",         L']'},
229         {"ctilde",      L'c'},          /* +tilde */
230         {"ctlig",       MLCT},
231         {"cyra",        L'а'},
232         {"cyre",        L'е'},
233         {"cyrhard",     L'ъ'},
234         {"cyrjat",      L'ѣ'},
235         {"cyrm",        L'м'},
236         {"cyrn",        L'н'},
237         {"cyrr",        L'р'},
238         {"cyrsoft",     L'ь'},
239         {"cyrt",        L'т'},
240         {"cyry",        L'ы'},
241         {"dag",         L'†'},
242         {"dbar",        L'đ'},
243         {"dblar",       L'⇋'},
244         {"dblgt",       L'≫'},
245         {"dbllt",       L'≪'},
246         {"dced",        L'd'},          /* +cedilla */
247         {"dd",          MDD},
248         {"ddag",        L'‡'},
249         {"ddd",         MDDD},
250         {"decr",        L'↓'},
251         {"deg",         L'°'},
252         {"dele",        L'd'},          /* should be dele */
253         {"delta",       L'δ'},
254         {"descnode",    L'☋'},                /* descending node U+260B */
255         {"diamond",     L'♢'},
256         {"digamma",     L'ϝ'},
257         {"div",         L'÷'},
258         {"dlessi",      L'ı'},
259         {"dlessj1",     L'j'},          /* should be dotless */
260         {"dlessj2",     L'j'},          /* should be dotless */
261         {"dlessj3",     L'j'},          /* should be dotless */
262         {"dollar",      L'$'},
263         {"dotab",       LDOT},
264         {"dotbl",       LDTB},
265         {"drachm",      L'ʒ'},
266         {"dubh",        L'-'},
267         {"eacute",      L'é'},
268         {"earth",       L'♁'},
269         {"easper",      MEAS},
270         {"ebreve",      L'ĕ'},
271         {"ecirc",       L'ê'},
272         {"edh",         L'ð'},
273         {"egrave",      L'è'},
274         {"ehacek",      L'ě'},
275         {"ehook",       L'ę'},
276         {"elem",        L'∊'},
277         {"elenis",      MELN},
278         {"em",          L'—'},
279         {"emacr",       L'ē'},
280         {"emem",        MEMM},
281         {"en",          L'–'},
282         {"epsilon",     L'ε'},
283         {"equil",       L'⇋'},
284         {"ergo",        L'∴'},
285         {"es",          MES},
286         {"eszett",      L'ß'},
287         {"eta",         L'η'},
288         {"eth",         L'ð'},
289         {"euml",        L'ë'},
290         {"expon",       L'↑'},
291         {"fact",        L'!'},
292         {"fata",        L'ɑ'},
293         {"fatpara",     L'¶'},         /* should have fatter, filled in bowl */
294         {"female",      L'♀'},
295         {"ffilig",      MLFFI},
296         {"fflig",       MLFF},
297         {"ffllig",      MLFFL},
298         {"filig",       MLFI},
299         {"flat",        L'♭'},
300         {"fllig",       MLFL},
301         {"frE",         L'E'},          /* should be curly */
302         {"frL",         L'L'},          /* should be curly */
303         {"frR",         L'R'},          /* should be curly */
304         {"frakB",       L'B'},          /* should have fraktur style */
305         {"frakG",       L'G'},
306         {"frakH",       L'H'},
307         {"frakI",       L'I'},
308         {"frakM",       L'M'},
309         {"frakU",       L'U'},
310         {"frakX",       L'X'},
311         {"frakY",       L'Y'},
312         {"frakh",       L'h'},
313         {"frbl",        LFRB},
314         {"frown",       LFRN},
315         {"fs",          L' '},
316         {"fsigma",      L'ς'},
317         {"gAacute",     L'Á'},         /* should be Α+acute */
318         {"gaacute",     L'α'},         /* +acute */
319         {"gabreve",     L'α'},         /* +breve */
320         {"gafrown",     L'α'},         /* +frown */
321         {"gagrave",     L'α'},         /* +grave */
322         {"gamacr",      L'α'},         /* +macron */
323         {"gamma",       L'γ'},
324         {"gauml",       L'α'},         /* +umlaut */
325         {"ge",          L'≧'},
326         {"geacute",     L'ε'},         /* +acute */
327         {"gegrave",     L'ε'},         /* +grave */
328         {"ghacute",     L'η'},         /* +acute */
329         {"ghfrown",     L'η'},         /* +frown */
330         {"ghgrave",     L'η'},         /* +grave */
331         {"ghmacr",      L'η'},         /* +macron */
332         {"giacute",     L'ι'},         /* +acute */
333         {"gibreve",     L'ι'},         /* +breve */
334         {"gifrown",     L'ι'},         /* +frown */
335         {"gigrave",     L'ι'},         /* +grave */
336         {"gimacr",      L'ι'},         /* +macron */
337         {"giuml",       L'ι'},         /* +umlaut */
338         {"glagjat",     L'ѧ'},
339         {"glots",       L'ˀ'},
340         {"goacute",     L'ο'},         /* +acute */
341         {"gobreve",     L'ο'},         /* +breve */
342         {"grave",       LGRV},
343         {"gt",          L'>'},
344         {"guacute",     L'υ'},         /* +acute */
345         {"gufrown",     L'υ'},         /* +frown */
346         {"gugrave",     L'υ'},         /* +grave */
347         {"gumacr",      L'υ'},         /* +macron */
348         {"guuml",       L'υ'},         /* +umlaut */
349         {"gwacute",     L'ω'},         /* +acute */
350         {"gwfrown",     L'ω'},         /* +frown */
351         {"gwgrave",     L'ω'},         /* +grave */
352         {"hacek",       LHCK},
353         {"halft",       L'⌈'},
354         {"hash",        L'#'},
355         {"hasper",      MHAS},
356         {"hatpath",     L'ֲ'},         /* hataf patah U+05B2 */
357         {"hatqam",      L'ֳ'},         /* hataf qamats U+05B3 */
358         {"hatseg",      L'ֱ'},         /* hataf segol U+05B1 */
359         {"hbar",        L'ħ'},
360         {"heart",       L'♡'},
361         {"hebaleph",    L'א'},         /* aleph U+05D0 */
362         {"hebayin",     L'ע'},         /* ayin U+05E2 */
363         {"hebbet",      L'ב'},         /* bet U+05D1 */
364         {"hebbeth",     L'ב'},         /* bet U+05D1 */
365         {"hebcheth",    L'ח'},         /* bet U+05D7 */
366         {"hebdaleth",   L'ד'},         /* dalet U+05D3 */
367         {"hebgimel",    L'ג'},         /* gimel U+05D2 */
368         {"hebhe",       L'ה'},         /* he U+05D4 */
369         {"hebkaph",     L'כ'},         /* kaf U+05DB */
370         {"heblamed",    L'ל'},         /* lamed U+05DC */
371         {"hebmem",      L'מ'},         /* mem U+05DE */
372         {"hebnun",      L'נ'},         /* nun U+05E0 */
373         {"hebnunfin",   L'ן'},         /* final nun U+05DF */
374         {"hebpe",       L'פ'},         /* pe U+05E4 */
375         {"hebpedag",    L'ף'},         /* final pe? U+05E3 */
376         {"hebqoph",     L'ק'},         /* qof U+05E7 */
377         {"hebresh",     L'ר'},         /* resh U+05E8 */
378         {"hebshin",     L'ש'},         /* shin U+05E9 */
379         {"hebtav",      L'ת'},         /* tav U+05EA */
380         {"hebtsade",    L'צ'},         /* tsadi U+05E6 */
381         {"hebwaw",      L'ו'},         /* vav? U+05D5 */
382         {"hebyod",      L'י'},         /* yod U+05D9 */
383         {"hebzayin",    L'ז'},         /* zayin U+05D6 */
384         {"hgz",         L'ʒ'},         /* ??? Cf "alet" */
385         {"hireq",       L'ִ'},         /* U+05B4 */
386         {"hlenis",      MHLN},
387         {"hook",        LOGO},
388         {"horizE",      L'E'},          /* should be on side */
389         {"horizP",      L'P'},          /* should be on side */
390         {"horizS",      L'∽'},
391         {"horizT",      L'⊣'},
392         {"horizb",      L'{'},          /* should be underbrace */
393         {"ia",          L'α'},
394         {"iacute",      L'í'},
395         {"iasper",      MIAS},
396         {"ib",          L'β'},
397         {"ibar",        L'ɨ'},
398         {"ibreve",      L'ĭ'},
399         {"icirc",       L'î'},
400         {"id",          L'δ'},
401         {"ident",       L'≡'},
402         {"ie",          L'ε'},
403         {"ifilig",      MLFI},
404         {"ifflig",      MLFF},
405         {"ig",          L'γ'},
406         {"igrave",      L'ì'},
407         {"ih",          L'η'},
408         {"ii",          L'ι'},
409         {"ik",          L'κ'},
410         {"ilenis",      MILN},
411         {"imacr",       L'ī'},
412         {"implies",     L'⇒'},
413         {"index",       L'☞'},
414         {"infin",       L'∞'},
415         {"integ",       L'∫'},
416         {"intsec",      L'∩'},
417         {"invpri",      L'ˏ'},
418         {"iota",        L'ι'},
419         {"iq",          L'ψ'},
420         {"istlig",      MLST},
421         {"isub",        L'ϵ'},         /* iota below accent */
422         {"iuml",        L'ï'},
423         {"iz",          L'ζ'},
424         {"jup",         L'♃'},
425         {"kappa",       L'κ'},
426         {"koppa",       L'ϟ'},
427         {"lambda",      L'λ'},
428         {"lar",         L'←'},
429         {"lbar",        L'ł'},
430         {"le",          L'≦'},
431         {"lenis",       LLEN},
432         {"leo",         L'♌'},
433         {"lhalfbr",     L'⌈'},
434         {"lhshoe",      L'⊃'},
435         {"libra",       L'♎'},
436         {"llswing",     MLLS},
437         {"lm",          L'ː'},
438         {"logicand",    L'∧'},
439         {"logicor",     L'∨'},
440         {"longs",       L'ʃ'},
441         {"lrar",        L'↔'},
442         {"lt",          L'<'},
443         {"ltappr",      L'≾'},
444         {"ltflat",      L'∠'},
445         {"lumlbl",      L'l'},          /* +umlaut below */
446         {"mac",         LMAC},
447         {"male",        L'♂'},
448         {"mc",          L'c'},          /* should be raised */
449         {"merc",        L'☿'},                /* mercury U+263F */
450         {"min",         L'−'},
451         {"moonfq",      L'☽'},                /* first quarter moon U+263D */
452         {"moonlq",      L'☾'},                /* last quarter moon U+263E */
453         {"msylab",      L'm'},          /* +sylab (ˌ) */
454         {"mu",          L'μ'},
455         {"nacute",      L'ń'},
456         {"natural",     L'♮'},
457         {"neq",         L'≠'},
458         {"nfacute",     L'′'},
459         {"nfasper",     L'ʽ'},
460         {"nfbreve",     L'˘'},
461         {"nfced",       L'¸'},
462         {"nfcirc",      L'ˆ'},
463         {"nffrown",     L'⌢'},
464         {"nfgra",       L'ˋ'},
465         {"nfhacek",     L'ˇ'},
466         {"nfmac",       L'¯'},
467         {"nftilde",     L'˜'},
468         {"nfuml",       L'¨'},
469         {"ng",          L'ŋ'},
470         {"not",         L'¬'},
471         {"notelem",     L'∉'},
472         {"ntilde",      L'ñ'},
473         {"nu",          L'ν'},
474         {"oab",         L'〈'},
475         {"oacute",      L'ó'},
476         {"oasper",      MOAS},
477         {"ob",          L'{'},
478         {"obar",        L'ø'},
479         {"obigb",       L'{'},          /* should be big */
480         {"obigpren",    L'('},
481         {"obigsb",      L'['},          /* should be big */
482         {"obreve",      L'ŏ'},
483         {"ocirc",       L'ô'},
484         {"odsb",        L'〚'},                /* [[ U+301A */
485         {"oelig",               L'œ'},
486         {"oeamp",       L'&'},
487         {"ograve",      L'ò'},
488         {"ohook",       L'o'},          /* +hook */
489         {"olenis",      MOLN},
490         {"omacr",       L'ō'},
491         {"omega",       L'ω'},
492         {"omicron",     L'ο'},
493         {"ope",         L'ɛ'},
494         {"opp",         L'☍'},
495         {"oq",          L'`'},
496         {"oqq",         L'“'},
497         {"or",          MOR},
498         {"osb",         L'['},
499         {"otilde",      L'õ'},
500         {"ouml",        L'ö'},
501         {"ounce",       L'℥'},                /* ounce U+2125 */
502         {"ovparen",     L'⌢'},                /* should be sideways ( */
503         {"p",           L'′'},
504         {"pa",          L'∂'},
505         {"page",        L'P'},
506         {"pall",        L'ʎ'},
507         {"paln",        L'ɲ'},
508         {"par",         PAR},
509         {"para",        L'¶'},
510         {"pbar",        L'p'},          /* +bar */
511         {"per",         L'℘'},                /* per U+2118 */
512         {"phi",         L'φ'},
513         {"phi2",        L'ϕ'},
514         {"pi",          L'π'},
515         {"pisces",      L'♓'},
516         {"planck",      L'ħ'},
517         {"plantinJ",    L'J'},          /* should be script */
518         {"pm",          L'±'},
519         {"pmil",        L'‰'},
520         {"pp",          L'″'},
521         {"ppp",         L'‴'},
522         {"prop",        L'∝'},
523         {"psi",         L'ψ'},
524         {"pstlg",       L'£'},
525         {"q",           L'?'},          /* should be raised */
526         {"qamets",      L'ֳ'},         /* U+05B3 */
527         {"quaver",      L'♪'},
528         {"rar",         L'→'},
529         {"rasper",      MRAS},
530         {"rdot",        L'·'},
531         {"recipe",      L'℞'},                /* U+211E */
532         {"reg",         L'®'},
533         {"revC",        L'Ɔ'},         /* open O U+0186 */
534         {"reva",        L'ɒ'},
535         {"revc",        L'ɔ'},
536         {"revope",      L'ɜ'},
537         {"revr",        L'ɹ'},
538         {"revsc",       L'˒'},         /* upside-down semicolon */
539         {"revv",        L'ʌ'},
540         {"rfa",         L'o'},          /* +hook (Cf "goal") */
541         {"rhacek",      L'ř'},
542         {"rhalfbr",     L'⌉'},
543         {"rho",         L'ρ'},
544         {"rhshoe",      L'⊂'},
545         {"rlenis",      MRLN},
546         {"rsylab",      L'r'},          /* +sylab */
547         {"runash",      L'F'},          /* should be runic 'ash' */
548         {"rvow",        L'˔'},
549         {"sacute",      L'ś'},
550         {"sagit",       L'♐'},
551         {"sampi",       L'ϡ'},
552         {"saturn",      L'♄'},
553         {"sced",        L'ş'},
554         {"schwa",       L'ə'},
555         {"scorpio",     L'♏'},
556         {"scrA",        L'A'},          /* should be script */
557         {"scrC",        L'C'},
558         {"scrE",        L'E'},
559         {"scrF",        L'F'},
560         {"scrI",        L'I'},
561         {"scrJ",        L'J'},
562         {"scrL",        L'L'},
563         {"scrO",        L'O'},
564         {"scrP",        L'P'},
565         {"scrQ",        L'Q'},
566         {"scrS",        L'S'},
567         {"scrT",        L'T'},
568         {"scrb",        L'b'},
569         {"scrd",        L'd'},
570         {"scrh",        L'h'},
571         {"scrl",        L'l'},
572         {"scruple",     L'℈'},                /* U+2108 */
573         {"sdd",         L'ː'},
574         {"sect",        L'§'},
575         {"semE",        L'∃'},
576         {"sh",          L'ʃ'},
577         {"shacek",      L'š'},
578         {"sharp",       L'♯'},
579         {"sheva",       L'ְ'},         /* U+05B0 */
580         {"shti",        L'ɪ'},
581         {"shtsyll",     L'∪'},
582         {"shtu",        L'ʊ'},
583         {"sidetri",     L'⊲'},
584         {"sigma",       L'σ'},
585         {"since",       L'∵'},
586         {"slge",        L'≥'},                /* should have slanted line under */
587         {"slle",        L'≤'},                /* should have slanted line under */
588         {"sm",          L'ˈ'},
589         {"smm",         L'ˌ'},
590         {"spade",       L'♠'},
591         {"sqrt",        L'√'},
592         {"square",      L'□'},                /* U+25A1 */
593         {"ssChi",       L'Χ'},         /* should be sans serif */
594         {"ssIota",      L'Ι'},
595         {"ssOmicron",   L'Ο'},
596         {"ssPi",        L'Π'},
597         {"ssRho",       L'Ρ'},
598         {"ssSigma",     L'Σ'},
599         {"ssTau",       L'Τ'},
600         {"star",        L'*'},
601         {"stlig",       MLST},
602         {"sup2",        L'²'},
603         {"supgt",       L'˃'},
604         {"suplt",       L'˂'},
605         {"sur",         L'ʳ'},
606         {"swing",       L'∼'},
607         {"tau",         L'τ'},
608         {"taur",        L'♉'},
609         {"th",          L'þ'},
610         {"thbar",       L'þ'},         /* +bar */
611         {"theta",       L'θ'},
612         {"thinqm",      L'?'},          /* should be thinner */
613         {"tilde",       LTIL},
614         {"times",       L'×'},
615         {"tri",         L'∆'},
616         {"trli",        L'‖'},
617         {"ts",          L' '},
618         {"uacute",      L'ú'},
619         {"uasper",      MUAS},
620         {"ubar",        L'u'},          /* +bar */
621         {"ubreve",      L'ŭ'},
622         {"ucirc",       L'û'},
623         {"udA",         L'∀'},
624         {"udT",         L'⊥'},
625         {"uda",         L'ɐ'},
626         {"udh",         L'ɥ'},
627         {"udqm",        L'¿'},
628         {"udpsi",       L'⋔'},
629         {"udtr",        L'∇'},
630         {"ugrave",      L'ù'},
631         {"ulenis",      MULN},
632         {"umacr",       L'ū'},
633         {"uml",         LUML},
634         {"undl",        L'ˍ'},         /* underline accent */
635         {"union",       L'∪'},
636         {"upsilon",     L'υ'},
637         {"uuml",        L'ü'},
638         {"vavpath",     L'ו'},         /* vav U+05D5 (+patah) */
639         {"vavsheva",    L'ו'},         /* vav U+05D5 (+sheva) */
640         {"vb",          L'|'},
641         {"vddd",        L'⋮'},
642         {"versicle2",   L'℣'},                /* U+2123 */
643         {"vinc",        L'¯'},
644         {"virgo",       L'♍'},
645         {"vpal",        L'ɟ'},
646         {"vvf",         L'ɣ'},
647         {"wasper",      MWAS},
648         {"wavyeq",      L'≈'},
649         {"wlenis",      MWLN},
650         {"wyn",         L'ƿ'},         /* wynn U+01BF */
651         {"xi",          L'ξ'},
652         {"yacute",      L'ý'},
653         {"ycirc",       L'ŷ'},
654         {"ygh",         L'ʒ'},
655         {"ymacr",       L'y'},          /* +macron */
656         {"yuml",        L'ÿ'},
657         {"zced",        L'z'},          /* +cedilla */
658         {"zeta",        L'ζ'},
659         {"zh",          L'ʒ'},
660         {"zhacek",      L'ž'},
661 };
662 /*
663    The following special characters don't have close enough
664    equivalents in Unicode, so aren't in the above table.
665         22n             2^(2^n) Cf Fermat
666         2on4            2/4
667         3on8            3/8
668         Bantuo          Bantu O. Cf Otshi-herero
669         Car             C with circular arrow on top
670         albrtime        cut-time: C with vertical line
671         ardal           Cf dental
672         bantuo          Bantu o. Cf Otshi-herero
673         bbc1            single chem bond below
674         bbc2            double chem bond below
675         bbl1            chem bond like /
676         bbl2            chem bond like //
677         bbr1            chem bond like \
678         bbr2            chem bond \\
679         bcop1           copper symbol. Cf copper
680         bcop2           copper symbol. Cf copper
681         benchm          Cf benchmark
682         btc1            single chem bond above
683         btc2            double chem bond above
684         btl1            chem bond like \
685         btl2            chem bond like \\
686         btr1            chem bond like /
687         btr2            chem bond line //
688         burman          Cf Burman
689         devph           sanskrit letter. Cf ph
690         devrfls         sanskrit letter. Cf cerebral
691         duplong[12]     musical note
692         egchi           early form of chi
693         eggamma[12]     early form of gamma
694         egiota          early form of iota
695         egkappa         early form of kappa
696         eglambda        early form of lambda
697         egmu[12]        early form of mu
698         egnu[12]        early form of nu
699         egpi[123]       early form of pi
700         egrho[12]       early form of rho
701         egsampi         early form of sampi
702         egsan           early form of san
703         egsigma[12]     early form of sigma
704         egxi[123]       early form of xi
705         elatS           early form of S
706         elatc[12]       early form of C
707         elatg[12]       early form of G
708         glagjeri        Slavonic Glagolitic jeri
709         glagjeru        Slavonic Glagolitic jeru
710         hypolem         hypolemisk (line with underdot)
711         lhrbr           lower half }
712         longmord        long mordent
713         mbwvow          backwards scretched C. Cf retract.
714         mord            music symbol.  Cf mordent
715         mostra          Cf direct
716         ohgcirc         old form of circumflex
717         oldbeta         old form of β. Cf perturbate
718         oldsemibr[12]   old forms of semibreve. Cf prolation
719         ormg            old form of g. Cf G
720         para[12345]     form of ¶
721         pauseo          musical pause sign
722         pauseu          musical pause sign
723         pharyng         Cf pharyngal
724         ragr            Black letter ragged r
725         repetn          musical repeat. Cf retort
726         segno           musical segno sign
727         semain[12]      semitic ain
728         semhe           semitic he
729         semheth         semitic heth
730         semkaph         semitic kaph
731         semlamed[12]    semitic lamed
732         semmem          semitic mem
733         semnum          semitic nun
734         sempe           semitic pe
735         semqoph[123]    semitic qoph
736         semresh         semitic resh
737         semtav[1234]    semitic tav
738         semyod          semitic yod
739         semzayin[123]   semitic zayin
740         shtlong[12]     U with underbar. Cf glyconic
741         sigmatau        σ,τ combination
742         squaver         sixteenth note
743         sqbreve         square musical breve note
744         swast           swastika
745         uhrbr           upper half of big }
746         versicle1               Cf versicle
747  */
748
749
750 static Rune normtab[128] = {
751         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
752 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
753         NONE,   NONE,   ' ',    NONE,   NONE,   NONE,   NONE,   NONE,
754 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
755         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
756 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
757         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
758 /*30*/  L'0',   L'1',   L'2',   L'3',   L'4',   L'5',   L'6',   L'7',
759         L'8',   L'9',   L':',   L';',   TAGS,   L'=',   TAGE,   L'?',
760 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
761         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
762 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
763         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
764 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
765         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
766 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
767         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
768 };
769 static Rune phtab[128] = {
770         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
771 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
772         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
773 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
774         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
775 /*20*/  L' ',   L'!',   L'ˈ',  L'#',   L'$',   L'ˌ',  L'æ',  L'\'',
776         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
777 /*30*/  L'0',   L'1',   L'2',   L'ɜ',  L'4',   L'5',   L'6',   L'7',
778         L'8',   L'ø',  L'ː',  L';',   TAGS,   L'=',   TAGE,   L'?',
779 /*40*/  L'ə',  L'ɑ',  L'B',   L'C',   L'ð',  L'ɛ',  L'F',   L'G',
780         L'H',   L'ɪ',  L'J',   L'K',   L'L',   L'M',   L'ŋ',  L'ɔ',
781 /*50*/  L'P',   L'ɒ',  L'R',   L'ʃ',  L'θ',  L'ʊ',  L'ʌ',  L'W',
782         L'X',   L'Y',   L'ʒ',  L'[',   L'\\',  L']',   L'^',   L'_',
783 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
784         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
785 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
786         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
787 };
788 static Rune grtab[128] = {
789         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
790 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
791         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
792 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
793         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
794 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
795         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
796 /*30*/  L'0',   L'1',   L'2',   L'3',   L'4',   L'5',   L'6',   L'7',
797         L'8',   L'9',   L':',   L';',   TAGS,   L'=',   TAGE,   L'?',
798 /*40*/  L'@',   L'Α',  L'Β',  L'Ξ',  L'Δ',  L'Ε',  L'Φ',  L'Γ',
799         L'Η',  L'Ι',  L'Ϛ',  L'Κ',  L'Λ',  L'Μ',  L'Ν',  L'Ο',
800 /*50*/  L'Π',  L'Θ',  L'Ρ',  L'Σ',  L'Τ',  L'Υ',  L'V',   L'Ω',
801         L'Χ',  L'Ψ',  L'Ζ',  L'[',   L'\\',  L']',   L'^',   L'_',
802 /*60*/  L'`',   L'α',  L'β',  L'ξ',  L'δ',  L'ε',  L'φ',  L'γ',
803         L'η',  L'ι',  L'ς',  L'κ',  L'λ',  L'μ',  L'ν',  L'ο',
804 /*70*/  L'π',  L'θ',  L'ρ',  L'σ',  L'τ',  L'υ',  L'v',   L'ω',
805         L'χ',  L'ψ',  L'ζ',  L'{',   L'|',   L'}',   L'~',   NONE,
806 };
807 static Rune subtab[128] = {
808         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
809 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
810         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
811 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
812         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
813 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
814         L'₍', L'₎', L'*',   L'₊', L',',   L'₋', L'.',   L'/',
815 /*30*/  L'₀', L'₁', L'₂', L'₃', L'₄', L'₅', L'₆', L'₇',
816         L'₈', L'₉', L':',   L';',   TAGS,   L'₌', TAGE,   L'?',
817 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
818         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
819 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
820         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
821 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
822         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
823 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
824         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
825 };
826 static Rune suptab[128] = {
827         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
828 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
829         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
830 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
831         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
832 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   SPCS,   L'\'',
833         L'⁽', L'⁾', L'*',   L'⁺', L',',   L'⁻', L'.',   L'/',
834 /*30*/  L'⁰', L'¹',  L'²',  L'³',  L'⁴', L'⁵', L'⁶', L'⁷',
835         L'⁸', L'⁹', L':',   L';',   TAGS,   L'⁼', TAGE,   L'?',
836 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
837         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
838 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
839         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
840 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
841         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
842 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
843         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
844 };
845
846 static int      tagstarts;
847 static char     tag[Buflen];
848 static char     spec[Buflen];
849 static Entry    curentry;
850 #define cursize (curentry.end-curentry.start)
851
852 static char     *getspec(char *, char *);
853 static char     *gettag(char *, char *);
854
855 /*
856  * cmd is one of:
857  *    'p': normal print
858  *    'h': just print headwords
859  *    'P': print raw
860  */
861 void
862 pgwprintentry(Entry e, int cmd)
863 {
864         char *p, *pe;
865         int t;
866         long r, rprev, rlig;
867         Rune *transtab;
868
869         p = e.start;
870         pe = e.end;
871         transtab = normtab;
872         rprev = NONE;
873         changett(0, 0, 0);
874         curentry = e;
875         if(cmd == 'h')
876                 outinhibit = 1;
877         while(p < pe) {
878                 if(cmd == 'r') {
879                         outchar(*p++);
880                         continue;
881                 }
882                 r = transtab[(*p++)&0x7F];
883                 if(r < NONE) {
884                         /* Emit the rune, but buffer in case of ligature */
885                         if(rprev != NONE)
886                                 outrune(rprev);
887                         rprev = r;
888                 } else if(r == SPCS) {
889                         /* Start of special character name */
890                         p = getspec(p, pe);
891                         r = lookassoc(spectab, asize(spectab), spec);
892                         if(r == -1) {
893                                 if(debug)
894                                         err("spec %ld %d %s",
895                                                 e.doff, cursize, spec);
896                                 r = L'�';
897                         }
898                         if(r >= LIGS && r < LIGE) {
899                                 /* handle possible ligature */
900                                 rlig = liglookup(r, rprev);
901                                 if(rlig != NONE)
902                                         rprev = rlig;   /* overwrite rprev */
903                                 else {
904                                         /* could print accent, but let's not */
905                                         if(rprev != NONE) outrune(rprev);
906                                         rprev = NONE;
907                                 }
908                         } else if(r >= MULTI && r < MULTIE) {
909                                 if(rprev != NONE) {
910                                         outrune(rprev);
911                                         rprev = NONE;
912                                 }
913                                 outrunes(multitab[r-MULTI]);
914                         } else if(r == PAR) {
915                                 if(rprev != NONE) {
916                                         outrune(rprev);
917                                         rprev = NONE;
918                                 }
919                                 outnl(1);
920                         } else {
921                                 if(rprev != NONE) outrune(rprev);
922                                 rprev = r;
923                         }
924                 } else if(r == TAGS) {
925                         /* Start of tag name */
926                         if(rprev != NONE) {
927                                 outrune(rprev);
928                                 rprev = NONE;
929                         }
930                         p = gettag(p, pe);
931                         t = lookassoc(tagtab, asize(tagtab), tag);
932                         if(t == -1) {
933                                 if(debug)
934                                         err("tag %ld %d %s",
935                                                 e.doff, cursize, tag);
936                                 continue;
937                         }
938                         switch(t){
939                         case Hw:
940                                 if(cmd == 'h') {
941                                         if(!tagstarts)
942                                                 outchar(' ');
943                                         outinhibit = !tagstarts;
944                                 }
945                                 break;
946                         case Sn:
947                                 if(tagstarts) {
948                                         outnl(2);
949                                 }
950                                 break;
951                         case P:
952                                 outnl(tagstarts);
953                                 break;
954                         case Col:
955                         case Br:
956                         case Blockquote:
957                                 if(tagstarts)
958                                         outnl(1);
959                                 break;
960                         case U:
961                                 outchar('/');
962                         }
963                 }
964         }
965         if(cmd == 'h') {
966                 outinhibit = 0;
967                 outnl(0);
968         }
969 }
970
971 /*
972  * Return offset into bdict where next webster entry after fromoff starts.
973  * Webster entries start with <p><hw>
974  */
975 long
976 pgwnextoff(long fromoff)
977 {
978         long a, n;
979         int c;
980
981         a = Bseek(bdict, fromoff, 0);
982         if(a != fromoff)
983                 return -1;
984         n = 0;
985         for(;;) {
986                 c = Bgetc(bdict);
987                 if(c < 0)
988                         break;
989                 if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') {
990                         c = Bgetc(bdict);
991                         if(c == '<') {
992                                 if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w' 
993                                         && Bgetc(bdict) == '>')
994                                                 n = 7;
995                         }else if (c == '{')
996                                 n = 4;
997                         if(n)
998                                 break;
999                 }
1000         }
1001         return (Boffset(bdict)-n);
1002 }
1003
1004 static char *prkey =
1005 "KEY TO THE PRONUNCIATION\n"
1006 "\n"
1007 "I. CONSONANTS\n"
1008 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1009 "\n"
1010 "g as in go (gəʊ)\n"
1011 "h  ...  ho! (həʊ)\n"
1012 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
1013 "(r)...  her (hɜː(r))\n"
1014 "s  ...  see (siː), success (səkˈsɜs)\n"
1015 "w  ...  wear (wɛə(r))\n"
1016 "hw ...  when (hwɛn)\n"
1017 "j  ...  yes (jɛs)\n"
1018 "θ  ...  thin (θin), bath (bɑːθ)\n"
1019 "ð  ...  then (ðɛn), bathe (beɪð)\n"
1020 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
1021 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
1022 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1023 "dʒ ...  judge (dʒʌdʒ)\n"
1024 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1025 "ŋg ...  finger (ˈfiŋgə(r))\n"
1026 "\n"
1027 "Foreign\n"
1028 "ʎ as in It. seraglio (serˈraʎo)\n"
1029 "ɲ  ...  Fr. cognac (kɔɲak)\n"
1030 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
1031 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1032 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
1033 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1034 "ɥ  ...  Fr. cuisine (kɥizin)\n"
1035 "\n"
1036 "II. VOWELS AND DIPTHONGS\n"
1037 "\n"
1038 "Short\n"
1039 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1040 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
1041 "æ  ...  pat (pæt)\n"
1042 "ʌ  ...  putt (pʌt)\n"
1043 "ɒ  ...  pot (pɒt)\n"
1044 "ʊ  ...  put (pʊt)\n"
1045 "ə  ...  another (əˈnʌðə(r))\n"
1046 "(ə)...  beaten (ˈbiːt(ə)n)\n"
1047 "i  ...  Fr. si (si)\n"
1048 "e  ...  Fr. bébé (bebe)\n"
1049 "a  ...  Fr. mari (mari)\n"
1050 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
1051 "ɔ  ...  Fr. homme (ɔm)\n"
1052 "o  ...  Fr. eau (o)\n"
1053 "ø  ...  Fr. peu (pø)\n"
1054 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
1055 "u  ...  Fr. douce (dus)\n"
1056 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
1057 "y  ...  Fr. du (dy)\n"
1058 "\n"
1059 "Long\n"
1060 "iː as in bean (biːn)\n"
1061 "ɑː ...  barn (bɑːn)\n"
1062 "ɔː ...  born (bɔːn)\n"
1063 "uː ...  boon (buːn)\n"
1064 "ɜː ...  burn (bɜːn)\n"
1065 "eː ...  Ger. Schnee (ʃneː)\n"
1066 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
1067 "aː ...  Ger. Tag (taːk)\n"
1068 "oː ...  Ger. Sohn (zoːn)\n"
1069 "øː ...  Ger. Goethe (gøːtə)\n"
1070 "yː ...  Ger. grün (gryːn)\n"
1071 "\n"
1072 "Nasal\n"
1073 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1074 "ã  ...  Fr. franc (frã)\n"
1075 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
1076 "œ˜ ...  Fr. un (œ˜)\n"
1077 "\n"
1078 "Dipthongs, etc.\n"
1079 "eɪ as in bay (beɪ)\n"
1080 "aɪ ...  buy (baɪ)\n"
1081 "ɔɪ ...  boy (bɔɪ)\n"
1082 "əʊ ...  no (nəʊ)\n"
1083 "aʊ ...  now (naʊ)\n"
1084 "ɪə ...  peer (pɪə(r))\n"
1085 "ɛə ...  pair (pɛə(r))\n"
1086 "ʊə ...  tour (tʊə(r))\n"
1087 "ɔə ...  boar (bɔə(r))\n"
1088 "\n"
1089 "III. STRESS\n"
1090 "\n"
1091 "Main stress: ˈ preceding stressed syllable\n"
1092 "Secondary stress: ˌ preceding stressed syllable\n"
1093 "\n"
1094 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1095 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1096
1097 void
1098 pgwprintkey(void)
1099 {
1100         Bprint(bout, "%s", prkey);
1101 }
1102
1103 /*
1104  * f points just after a '&', fe points at end of entry.
1105  * Accumulate the special name, starting after the &
1106  * and continuing until the next ';', in spec[].
1107  * Return pointer to char after ';'.
1108  */
1109 static char *
1110 getspec(char *f, char *fe)
1111 {
1112         char *t;
1113         int c, i;
1114
1115         t = spec;
1116         i = sizeof spec;
1117         while(--i > 0) {
1118                 c = *f++;
1119                 if(c == ';' || f == fe)
1120                         break;
1121                 *t++ = c;
1122         }
1123         *t = 0;
1124         return f;
1125 }
1126
1127 /*
1128  * f points just after '<'; fe points at end of entry.
1129  * Expect next characters from bin to match:
1130  *  [/][^ >]+( [^>=]+=[^ >]+)*>
1131  *      tag   auxname auxval
1132  * Accumulate the tag and its auxilliary information in
1133  * tag[], auxname[][] and auxval[][].
1134  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1135  * Set naux to the number of aux pairs found.
1136  * Return pointer to after final '>'.
1137  */
1138 static char *
1139 gettag(char *f, char *fe)
1140 {
1141         char *t;
1142         int c, i;
1143
1144         t = tag;
1145         c = *f++;
1146         if(c == '/')
1147                 tagstarts = 0;
1148         else {
1149                 tagstarts = 1;
1150                 *t++ = c;
1151         }
1152         i = Buflen;
1153         while(--i > 0) {
1154                 c = *f++;
1155                 if(c == '>' || f == fe)
1156                         break;
1157                 *t++ = c;
1158         }
1159         *t = 0;
1160         return f;
1161 }