]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/dict/pcollins.c
cc, ?[acl]: fix gethunk() and move common memory allocator code to cc/compat
[plan9front.git] / sys / src / cmd / dict / pcollins.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5
6 /*
7  * Routines for handling dictionaries in the "Paperback Collins"
8  * format (with tags surrounded by >....<)
9  */
10 enum {
11         Buflen=1000,
12 };
13
14 /* More special runes */
15 enum {
16         B = MULTIE+1,   /* bold */
17         H,              /* headword start */
18         I,              /* italics */
19         Ps,             /* pronunciation start */
20         Pe,             /* pronunciation end */
21         R,              /* roman */
22         X,              /* headword end */
23 };
24
25 /* Assoc tables must be sorted on first field */
26
27 static Assoc tagtab[] = {
28         {"AA",          L'Å'},
29         {"AC",          LACU},
30         {"B",           B},
31         {"CE",          LCED},
32         {"CI",          LFRN},
33         {"Di",          L'ı'},
34         {"EL",          L'-'},
35         {"GR",          LGRV},
36         {"H",           H},
37         {"I",           I},
38         {"OE",          L'Œ'},
39         {"R",           R},
40         {"TI",          LTIL},
41         {"UM",          LUML},
42         {"X",           X},
43         {"[",           Ps},
44         {"]",           Pe},
45         {"ac",          LACU},
46         {"ce",          LCED},
47         {"ci",          LFRN},
48         {"gr",          LGRV},
49         {"oe",          L'œ'},
50         {"supe",        L'e'},          /* should be raised */
51         {"supo",        L'o'},          /* should be raised */
52         {"ti",          LTIL},
53         {"um",          LUML},
54         {"{",           Ps},
55         {"~",           L'~'},
56         {"~~",          MTT},
57 };
58
59 static Rune normtab[128] = {
60         /*0*/   /*1*/   /*2*/   /*3*/   /*4*/   /*5*/   /*6*/   /*7*/
61 /*00*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
62         NONE,   NONE,   L' ',   NONE,   NONE,   NONE,   NONE,   NONE,
63 /*10*/  NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
64         NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,   NONE,
65 /*20*/  L' ',   L'!',   L'"',   L'#',   L'$',   L'%',   L'&',   L'\'',
66         L'(',   L')',   L'*',   L'+',   L',',   L'-',   L'.',   L'/',
67 /*30*/  L'0',   L'1',   L'2',   L'3',   L'4',   L'5',   L'6',   L'7',
68         L'8',   L'9',   L':',   L';',   TAGE,   L'=',   TAGS,   L'?',
69 /*40*/  L'@',   L'A',   L'B',   L'C',   L'D',   L'E',   L'F',   L'G',
70         L'H',   L'I',   L'J',   L'K',   L'L',   L'M',   L'N',   L'O',
71 /*50*/  L'P',   L'Q',   L'R',   L'S',   L'T',   L'U',   L'V',   L'W',
72         L'X',   L'Y',   L'Z',   L'[',   L'\\',  L']',   L'^',   L'_',
73 /*60*/  L'`',   L'a',   L'b',   L'c',   L'd',   L'e',   L'f',   L'g',
74         L'h',   L'i',   L'j',   L'k',   L'l',   L'm',   L'n',   L'o',
75 /*70*/  L'p',   L'q',   L'r',   L's',   L't',   L'u',   L'v',   L'w',
76         L'x',   L'y',   L'z',   L'{',   L'|',   L'}',   L'~',   NONE,
77 };
78
79 static char *gettag(char *, char *);
80
81 static Entry    curentry;
82 static char     tag[Buflen];
83 #define cursize (curentry.end-curentry.start)
84
85 void
86 pcollprintentry(Entry e, int cmd)
87 {
88         char *p, *pe;
89         long r, rprev, t, rlig;
90         int saveoi;
91         Rune *transtab;
92
93         p = e.start;
94         pe = e.end;
95         transtab = normtab;
96         rprev = NONE;
97         changett(0, 0, 0);
98         curentry = e;
99         saveoi = 0;
100         if(cmd == 'h')
101                 outinhibit = 1;
102         while(p < pe) {
103                 if(cmd == 'r') {
104                         outchar(*p++);
105                         continue;
106                 }
107                 r = transtab[(*p++)&0x7F];
108                 if(r < NONE) {
109                         /* Emit the rune, but buffer in case of ligature */
110                         if(rprev != NONE)
111                                 outrune(rprev);
112                         rprev = r;
113                 } else if(r == TAGS) {
114                         p = gettag(p, pe);
115                         t = lookassoc(tagtab, asize(tagtab), tag);
116                         if(t == -1) {
117                                 if(debug && !outinhibit)
118                                         err("tag %ld %d %s",
119                                                 e.doff, cursize, tag);
120                                 continue;
121                         }
122                         if(t < NONE) {
123                                 if(rprev != NONE)
124                                         outrune(rprev);
125                                 rprev = t;
126                         } else if(t >= LIGS && t < LIGE) {
127                                 /* handle possible ligature */
128                                 rlig = liglookup(t, rprev);
129                                 if(rlig != NONE)
130                                         rprev = rlig;   /* overwrite rprev */
131                                 else {
132                                         /* could print accent, but let's not */
133                                         if(rprev != NONE) outrune(rprev);
134                                         rprev = NONE;
135                                 }
136                         } else if(t >= MULTI && t < MULTIE) {
137                                 if(rprev != NONE) {
138                                         outrune(rprev);
139                                         rprev = NONE;
140                                 }
141                                 outrunes(multitab[t-MULTI]);
142                         } else {
143                                 if(rprev != NONE) {
144                                         outrune(rprev);
145                                         rprev = NONE;
146                                 }
147                                 switch(t){
148                                 case H:
149                                         if(cmd == 'h')
150                                                 outinhibit = 0;
151                                         else
152                                                 outnl(0);
153                                         break;
154                                 case X:
155                                         if(cmd == 'h')
156                                                 outinhibit = 1;
157                                         else
158                                                 outchars(".  ");
159                                         break;
160                                 case Ps:
161                                         /* don't know enough of pron. key yet */
162                                         saveoi = outinhibit;
163                                         outinhibit = 1;
164                                         break;
165                                 case Pe:
166                                         outinhibit = saveoi;
167                                         break;
168                                 }
169                         }
170                 }
171         }
172         if(cmd == 'h')
173                 outinhibit = 0;
174         outnl(0);
175 }
176
177 long
178 pcollnextoff(long fromoff)
179 {
180         long a;
181         char *p;
182
183         a = Bseek(bdict, fromoff, 0);
184         if(a < 0)
185                 return -1;
186         for(;;) {
187                 p = Brdline(bdict, '\n');
188                 if(!p)
189                         break;
190                 if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
191                         return (Boffset(bdict)-Blinelen(bdict));
192         }
193         return -1;
194 }
195
196 void
197 pcollprintkey(void)
198 {
199         Bprint(bout, "No pronunciation key yet\n");
200 }
201
202 /*
203  * f points just after '>'; fe points at end of entry.
204  * Expect next characters from bin to match:
205  *  [^ <]+<
206  *     tag
207  * Accumulate the tag in tag[].
208  * Return pointer to after final '<'.
209  */
210 static char *
211 gettag(char *f, char *fe)
212 {
213         char *t;
214         int c, i;
215
216         t = tag;
217         i = Buflen;
218         while(--i > 0) {
219                 c = *f++;
220                 if(c == '<' || f == fe)
221                         break;
222                 *t++ = c;
223         }
224         *t = 0;
225         return f;
226 }