]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/tr.c
adea05c25bdbad8d5fa608d500e8de2304d6e97f
[plan9front.git] / sys / src / cmd / tr.c
1 #include        <u.h>
2 #include        <libc.h>
3
4 typedef struct PCB      /* Control block controlling specification parse */
5 {
6         char    *base;          /* start of specification */
7         char    *current;       /* current parse point */
8         long    last;           /* last Rune returned */
9         long    final;          /* final Rune in a span */
10 } Pcb;
11
12 uchar   bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
13
14 #define SETBIT(a, c)            ((a)[(c)/8] |= bits[(c)&07])
15 #define CLEARBIT(a,c)           ((a)[(c)/8] &= ~bits[(c)&07])
16 #define BITSET(a,c)             ((a)[(c)/8] & bits[(c)&07])
17
18 #define MAXRUNE Runemax
19
20 uchar   f[(MAXRUNE+1)/8];
21 uchar   t[(MAXRUNE+1)/8];
22 char    wbuf[4096];
23 char    *wptr;
24
25 Pcb pfrom, pto;
26
27 int cflag;
28 int dflag;
29 int sflag;
30
31 void    complement(void);
32 void    delete(void);
33 void    squeeze(void);
34 void    translit(void);
35 long    canon(Pcb*);
36 char    *getrune(char*, Rune*);
37 void    Pinit(Pcb*, char*);
38 void    Prewind(Pcb *p);
39 int     readrune(int, long*);
40 void    wflush(int);
41 void    writerune(int, Rune);
42
43 static void
44 usage(void)
45 {
46         fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0);
47         exits("usage");
48 }
49
50 void
51 main(int argc, char **argv)
52 {
53         ARGBEGIN{
54         case 's':       sflag++; break;
55         case 'd':       dflag++; break;
56         case 'c':       cflag++; break;
57         default:        usage();
58         }ARGEND
59         if(argc>0)
60                 Pinit(&pfrom, argv[0]);
61         if(argc>1)
62                 Pinit(&pto, argv[1]);
63         if(argc>2)
64                 usage();
65         if(dflag) {
66                 if ((sflag && argc != 2) || (!sflag && argc != 1))
67                         usage();
68                 delete();
69         } else {
70                 if (argc != 2)
71                         usage();
72                 if (cflag)
73                         complement();
74                 else translit();
75         }
76         exits(0);
77 }
78
79 void
80 delete(void)
81 {
82         long c, last;
83
84         if (cflag) {
85                 memset((char *) f, 0xff, sizeof f);
86                 while ((c = canon(&pfrom)) >= 0)
87                         CLEARBIT(f, c);
88         } else {
89                 while ((c = canon(&pfrom)) >= 0)
90                         SETBIT(f, c);
91         }
92         if (sflag) {
93                 while ((c = canon(&pto)) >= 0)
94                         SETBIT(t, c);
95         }
96
97         last = 0x10000;
98         while (readrune(0, &c) > 0) {
99                 if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
100                         last = c;
101                         writerune(1, (Rune) c);
102                 }
103         }
104         wflush(1);
105 }
106
107 void
108 complement(void)
109 {
110         Rune *p;
111         int i;
112         long from, to, lastc, high;
113
114         lastc = 0;
115         high = 0;
116         while ((from = canon(&pfrom)) >= 0) {
117                 if (from > high) high = from;
118                 SETBIT(f, from);
119         }
120         while ((to = canon(&pto)) > 0) {
121                 if (to > high) high = to;
122                 SETBIT(t,to);
123         }
124         Prewind(&pto);
125         if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
126                 sysfatal("no memory");
127         for (i = 0; i <= high; i++){
128                 if (!BITSET(f,i)) {
129                         if ((to = canon(&pto)) < 0)
130                                 to = lastc;
131                         else lastc = to;
132                         p[i] = to;
133                 }
134                 else p[i] = i;
135         }
136         if (sflag){
137                 lastc = 0x10000;
138                 while (readrune(0, &from) > 0) {
139                         if (from > high)
140                                 from = to;
141                         else
142                                 from = p[from];
143                         if (from != lastc || !BITSET(t,from)) {
144                                 lastc = from;
145                                 writerune(1, (Rune) from);
146                         }
147                 }
148                                 
149         } else {
150                 while (readrune(0, &from) > 0){
151                         if (from > high)
152                                 from = to;
153                         else
154                                 from = p[from];
155                         writerune(1, (Rune) from);
156                 }
157         }
158         wflush(1);
159 }
160
161 void
162 translit(void)
163 {
164         Rune *p;
165         int i;
166         long from, to, lastc, high;
167
168         lastc = 0;
169         high = 0;
170         while ((from = canon(&pfrom)) >= 0)
171                 if (from > high) high = from;
172         Prewind(&pfrom);
173         if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
174                 sysfatal("no memory");
175         for (i = 0; i <= high; i++)
176                 p[i] = i;
177         while ((from = canon(&pfrom)) >= 0) {
178                 if ((to = canon(&pto)) < 0)
179                         to = lastc;
180                 else lastc = to;
181                 if (BITSET(f,from) && p[from] != to)
182                         sysfatal("ambiguous translation");
183                 SETBIT(f,from);
184                 p[from] = to;
185                 SETBIT(t,to);
186         }
187         while ((to = canon(&pto)) >= 0) {
188                 SETBIT(t,to);
189         }
190         if (sflag){
191                 lastc = 0x10000;
192                 while (readrune(0, &from) > 0) {
193                         if (from <= high)
194                                 from = p[from];
195                         if (from != lastc || !BITSET(t,from)) {
196                                 lastc = from;
197                                 writerune(1, (Rune) from);
198                         }
199                 }
200                                 
201         } else {
202                 while (readrune(0, &from) > 0) {
203                         if (from <= high)
204                                 from = p[from];
205                         writerune(1, (Rune) from);
206                 }
207         }
208         wflush(1);
209 }
210
211 int
212 readrune(int fd, long *rp)
213 {
214         Rune r;
215         int j;
216         static int i, n;
217         static char buf[4096];
218
219         j = i;
220         for (;;) {
221                 if (i >= n) {
222                         wflush(1);
223                         if (j != i)
224                                 memcpy(buf, buf+j, n-j);
225                         i = n-j;
226                         n = read(fd, &buf[i], sizeof(buf)-i);
227                         if (n < 0)
228                                 sysfatal("read error: %r");
229                         if (n == 0)
230                                 return 0;
231                         j = 0;
232                         n += i;
233                 }
234                 i++;
235                 if (fullrune(&buf[j], i-j))
236                         break;
237         }
238         chartorune(&r, &buf[j]);
239         *rp = r;
240         return 1;
241 }
242
243 void
244 writerune(int fd, Rune r)
245 {
246         char buf[UTFmax];
247         int n;
248
249         if (!wptr)
250                 wptr = wbuf;
251         n = runetochar(buf, (Rune*)&r);
252         if (wptr+n >= wbuf+sizeof(wbuf))
253                 wflush(fd);
254         memcpy(wptr, buf, n);
255         wptr += n;
256 }
257
258 void
259 wflush(int fd)
260 {
261         if (wptr && wptr > wbuf)
262                 if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
263                         sysfatal("write error: %r");
264         wptr = wbuf;
265 }
266
267 char *
268 getrune(char *s, Rune *rp)
269 {
270         Rune r;
271         char *save;
272         int i, n;
273
274         s += chartorune(rp, s);
275         if((r = *rp) == '\\' && *s){
276                 n = 0;
277                 if (*s == 'x') {
278                         s++;
279                         for (i = 0; i < 4; i++) {
280                                 save = s;
281                                 s += chartorune(&r, s);
282                                 if ('0' <= r && r <= '9')
283                                         n = 16*n + r - '0';
284                                 else if ('a' <= r && r <= 'f')
285                                         n = 16*n + r - 'a' + 10;
286                                 else if ('A' <= r && r <= 'F')
287                                         n = 16*n + r - 'A' + 10;
288                                 else {
289                                         if (i == 0)
290                                                 *rp = 'x';
291                                         else *rp = n;
292                                         return save;
293                                 }
294                         }
295                 } else {
296                         for(i = 0; i < 3; i++) {
297                                 save = s;
298                                 s += chartorune(&r, s);
299                                 if('0' <= r && r <= '7')
300                                         n = 8*n + r - '0';
301                                 else {
302                                         if (i == 0)
303                                         {
304                                                 *rp = r;
305                                                 return s;
306                                         }
307                                         *rp = n;
308                                         return save;
309                                 }
310                         }
311                         if(n > 0377)
312                                 sysfatal("character > 0377");
313                 }
314                 *rp = n;
315         }
316         return s;
317 }
318
319 long
320 canon(Pcb *p)
321 {
322         Rune r;
323
324         if (p->final >= 0) {
325                 if (p->last < p->final)
326                         return ++p->last;
327                 p->final = -1;
328         }
329         if (*p->current == '\0')
330                 return -1;
331         if(*p->current == '-' && p->last >= 0 && p->current[1]){
332                 p->current = getrune(p->current+1, &r);
333                 if (r < p->last)
334                         sysfatal("invalid range specification");
335                 if (r > p->last) {
336                         p->final = r;
337                         return ++p->last;
338                 }
339         }
340         p->current = getrune(p->current, &r);
341         p->last = r;
342         return p->last;
343 }
344
345 void
346 Pinit(Pcb *p, char *cp)
347 {
348         p->current = p->base = cp;
349         p->last = p->final = -1;
350 }
351 void
352 Prewind(Pcb *p)
353 {
354         p->current = p->base;
355         p->last = p->final = -1;
356 }