]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/acid/lex.c
acid: fix cvtitoa buffer overflow
[plan9front.git] / sys / src / cmd / acid / lex.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6 #define Extern extern
7 #include "acid.h"
8 #include "y.tab.h"
9
10 struct keywd
11 {
12         char    *name;
13         int     terminal;
14 }
15 keywds[] =
16 {
17         "do",           Tdo,
18         "if",           Tif,
19         "then",         Tthen,
20         "else",         Telse,
21         "while",        Twhile,
22         "loop",         Tloop,
23         "head",         Thead,
24         "tail",         Ttail,
25         "append",       Tappend,
26         "defn",         Tfn,
27         "return",       Tret,
28         "local",        Tlocal,
29         "aggr",         Tcomplex,
30         "union",        Tcomplex,
31         "adt",          Tcomplex,
32         "complex",      Tcomplex,
33         "delete",       Tdelete,
34         "whatis",       Twhat,
35         "eval",         Teval,
36         "builtin",      Tbuiltin,
37         0,              0
38 };
39
40 char cmap[256] =
41 {
42         ['0']   '\0'+1,
43         ['n']   '\n'+1,
44         ['r']   '\r'+1,
45         ['t']   '\t'+1,
46         ['b']   '\b'+1,
47         ['f']   '\f'+1,
48         ['a']   '\a'+1,
49         ['v']   '\v'+1,
50         ['\\']  '\\'+1,
51         ['"']   '"'+1,
52 };
53
54 void
55 kinit(void)
56 {
57         int i;
58         
59         for(i = 0; keywds[i].name; i++) 
60                 enter(keywds[i].name, keywds[i].terminal);
61 }
62
63 typedef struct IOstack IOstack;
64 struct IOstack
65 {
66         char    *name;
67         int     line;
68         char    *text;
69         char    *ip;
70         Biobuf  *fin;
71         IOstack *prev;
72 };
73 IOstack *lexio;
74
75 void
76 pushfile(char *file)
77 {
78         Biobuf *b;
79         IOstack *io;
80
81         if(file)
82                 b = Bopen(file, OREAD);
83         else{
84                 b = Bopen("/fd/0", OREAD);
85                 file = "<stdin>";
86         }
87
88         if(b == 0)
89                 error("pushfile: %s: %r", file);
90
91         io = malloc(sizeof(IOstack));
92         if(io == 0)
93                 fatal("no memory");
94         io->name = strdup(file);
95         if(io->name == 0)
96                 fatal("no memory");
97         io->line = line;
98         line = 1;
99         io->text = 0;
100         io->fin = b;
101         io->prev = lexio;
102         lexio = io;
103 }
104
105 void
106 pushstr(Node *s)
107 {
108         IOstack *io;
109
110         io = malloc(sizeof(IOstack));
111         if(io == 0)
112                 fatal("no memory");
113         io->line = line;
114         line = 1;
115         io->name = strdup("<string>");
116         if(io->name == 0)
117                 fatal("no memory");
118         io->line = line;
119         line = 1;
120         io->text = strdup(s->string->string);
121         if(io->text == 0)
122                 fatal("no memory");
123         io->ip = io->text;
124         io->fin = 0;
125         io->prev = lexio;
126         lexio = io;
127 }
128
129 void
130 restartio(void)
131 {
132         Bflush(lexio->fin);
133         Binit(lexio->fin, 0, OREAD);
134 }
135
136 int
137 popio(void)
138 {
139         IOstack *s;
140
141         if(lexio == 0)
142                 return 0;
143
144         if(lexio->prev == 0){
145                 if(lexio->fin)
146                         restartio();
147                 return 0;
148         }
149
150         if(lexio->fin)
151                 Bterm(lexio->fin);
152         else
153                 free(lexio->text);
154         free(lexio->name);
155         line = lexio->line;
156         s = lexio;
157         lexio = s->prev;
158         free(s);
159         return 1;
160 }
161
162 int
163 Lfmt(Fmt *f)
164 {
165         int i;
166         char buf[1024];
167         IOstack *e;
168
169         e = lexio;
170         if(e) {
171                 i = snprint(buf, sizeof(buf), "%s:%d", e->name, line);
172                 while(e->prev) {
173                         e = e->prev;
174                         if(initialising && e->prev == 0)
175                                 break;
176                         i += snprint(buf+i, sizeof(buf)-i, " [%s:%d]", e->name, e->line);
177                 }
178         } else
179                 snprint(buf, sizeof(buf),  "no file:0");
180         fmtstrcpy(f, buf);
181         return 0;
182 }
183
184 void
185 unlexc(int s)
186 {
187         if(s == '\n')
188                 line--;
189
190         if(lexio->fin)
191                 Bungetc(lexio->fin);
192         else
193                 lexio->ip--;
194 }
195
196 int
197 lexc(void)
198 {
199         int c;
200
201         if(lexio->fin) {
202                 c = Bgetc(lexio->fin);
203                 if(gotint)
204                         error("interrupt");
205                 return c;
206         }
207
208         c = *lexio->ip++;
209         if(c == 0)
210                 return -1;
211         return c;
212 }
213
214 int
215 escchar(char c)
216 {
217         int n;
218         char buf[Strsize];
219
220         if(c >= '0' && c <= '9') {
221                 n = 1;
222                 buf[0] = c;
223                 for(;;) {
224                         c = lexc();
225                         if(c == Eof)
226                                 error("%d: <eof> in escape sequence", line);
227                         if(strchr("0123456789xX", c) == 0) {
228                                 unlexc(c);
229                                 break;
230                         }
231                         if(n >= Strsize)
232                                 error("string escape too long");
233                         buf[n++] = c;
234                 }
235                 buf[n] = '\0';
236                 return strtol(buf, 0, 0);
237         }
238
239         n = cmap[c];
240         if(n == 0)
241                 return c;
242         return n-1;
243 }
244
245 void
246 eatstring(void)
247 {
248         int esc, c, cnt;
249         char buf[Strsize];
250
251         esc = 0;
252         for(cnt = 0;;) {
253                 c = lexc();
254                 switch(c) {
255                 case Eof:
256                         error("%d: <eof> in string constant", line);
257
258                 case '\n':
259                         error("newline in string constant");
260                         goto done;
261
262                 case '\\':
263                         if(esc)
264                                 goto Default;
265                         esc = 1;
266                         break;
267
268                 case '"':
269                         if(esc == 0)
270                                 goto done;
271
272                         /* Fall through */
273                 default:
274                 Default:
275                         if(esc) {
276                                 c = escchar(c);
277                                 esc = 0;
278                         }
279                         buf[cnt++] = c;
280                         break;
281                 }
282                 if(cnt >= Strsize)
283                         error("string token too long");
284         }
285 done:
286         buf[cnt] = '\0';
287         yylval.string = strnode(buf);
288 }
289
290 void
291 eatnl(void)
292 {
293         int c;
294
295         line++;
296         for(;;) {
297                 c = lexc();
298                 if(c == Eof)
299                         error("eof in comment");
300                 if(c == '\n')
301                         return;
302         }
303 }
304
305 int
306 yylex(void)
307 {
308         int c;
309         extern char vfmt[];
310
311 loop:
312         Bflush(bout);
313         c = lexc();
314         switch(c) {
315         case Eof:
316                 if(gotint) {
317                         gotint = 0;
318                         stacked = 0;
319                         Bprint(bout, "\nacid: ");
320                         goto loop;
321                 }
322                 return Eof;
323
324         case '"':
325                 eatstring();
326                 return Tstring;
327
328         case ' ':
329         case '\t':
330                 goto loop;
331
332         case '/':
333                 c = lexc();
334                 if(c != '/'){
335                         unlexc(c);
336                         return '/';
337                 }
338                 eatnl();
339
340         case '\n':
341                 line++;
342                 if(interactive == 0)
343                         goto loop;
344                 if(stacked) {
345                         print("\t");
346                         goto loop;
347                 }
348                 return ';';
349
350         case '.':
351                 c = lexc();
352                 unlexc(c);
353                 if(isdigit(c))
354                         return numsym('.');
355
356                 return '.';
357  
358         case '(':
359         case ')':
360         case '[':
361         case ']':
362         case ';':
363         case ':':
364         case ',':
365         case '~':
366         case '?':
367         case '*':
368         case '@':
369         case '^':
370         case '%':
371                 return c;
372         case '{':
373                 stacked++;
374                 return c;
375         case '}':
376                 stacked--;
377                 return c;
378
379         case '\\':
380                 c = lexc();
381                 if(strchr(vfmt, c) == 0) {
382                         unlexc(c);
383                         return '\\';
384                 }
385                 yylval.ival = c;
386                 return Tfmt;
387
388         case '!':
389                 c = lexc();
390                 if(c == '=')
391                         return Tneq;
392                 unlexc(c);
393                 return '!';
394
395         case '+':
396                 c = lexc();
397                 if(c == '+')
398                         return Tinc;
399                 unlexc(c);
400                 return '+';
401
402         case '\'':
403                 c = lexc();
404                 if(c == '\\')
405                         yylval.ival = escchar(lexc());
406                 else
407                         yylval.ival = c;
408                 c = lexc();
409                 if(c != '\'') {
410                         error("missing '");
411                         unlexc(c);
412                 }
413                 return Tconst;
414
415         case '&':
416                 c = lexc();
417                 if(c == '&')
418                         return Tandand;
419                 unlexc(c);
420                 return '&';
421
422         case '=':
423                 c = lexc();
424                 if(c == '=')
425                         return Teq;
426                 unlexc(c);
427                 return '=';
428
429         case '|':
430                 c = lexc();
431                 if(c == '|')
432                         return Toror;
433                 unlexc(c);
434                 return '|';
435
436         case '<':
437                 c = lexc();
438                 if(c == '=')
439                         return Tleq;
440                 if(c == '<')
441                         return Tlsh;
442                 unlexc(c);
443                 return '<';
444
445         case '>':
446                 c = lexc();
447                 if(c == '=')
448                         return Tgeq;
449                 if(c == '>')
450                         return Trsh;
451                 unlexc(c);
452                 return '>';
453
454         case '-':
455                 c = lexc();
456
457                 if(c == '>')
458                         return Tindir;
459
460                 if(c == '-')
461                         return Tdec;
462                 unlexc(c);
463                 return '-';
464
465         default:
466                 return numsym(c);
467         }
468 }
469
470 int
471 numsym(char first)
472 {
473         int c, isbin, isfloat, ishex;
474         char *sel, *p;
475         Lsym *s;
476
477         symbol[0] = first;
478         p = symbol;
479
480         ishex = 0;
481         isbin = 0;
482         isfloat = 0;
483         if(first == '.')
484                 isfloat = 1;
485
486         if(isdigit(*p++) || isfloat) {
487                 for(;;) {
488                         c = lexc();
489                         if(c < 0)
490                                 error("%d: <eof> eating symbols", line);
491
492                         if(c == '\n')
493                                 line++;
494                         sel = "01234567890.xb";
495                         if(ishex)
496                                 sel = "01234567890abcdefABCDEF";
497                         else if(isbin)
498                                 sel = "01";
499                         else if(isfloat)
500                                 sel = "01234567890eE-+";
501
502                         if(strchr(sel, c) == 0) {
503                                 unlexc(c);
504                                 break;
505                         }
506                         if(c == '.')
507                                 isfloat = 1;
508                         if(!isbin && c == 'x')
509                                 ishex = 1;
510                         if(!ishex && c == 'b')
511                                 isbin = 1;
512                         *p++ = c;
513                 }
514                 *p = '\0';
515                 if(isfloat) {
516                         yylval.fval = atof(symbol);
517                         return Tfconst;
518                 }
519
520                 if(isbin)
521                         yylval.ival = strtoull(symbol+2, 0, 2);
522                 else
523                         yylval.ival = strtoull(symbol, 0, 0);
524                 return Tconst;
525         }
526
527         for(;;) {
528                 c = lexc();
529                 if(c < 0)
530                         error("%d <eof> eating symbols", line);
531                 if(c == '\n')
532                         line++;
533                 if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) {   /* checking against ~ lets UTF names through */
534                         unlexc(c);
535                         break;
536                 }
537                 *p++ = c;
538         }
539
540         *p = '\0';
541
542         s = look(symbol);
543         if(s == 0)
544                 s = enter(symbol, Tid);
545
546         yylval.sym = s;
547         return s->lexval;
548 }
549
550 Lsym*
551 enter(char *name, int t)
552 {
553         Lsym *s;
554         uint h;
555         char *p;
556         Value *v;
557
558         h = 0;
559         for(p = name; *p; p++)
560                 h = h*3 + *p;
561         h %= Hashsize;
562
563         s = gmalloc(sizeof(Lsym));
564         memset(s, 0, sizeof(Lsym));
565         s->name = strdup(name);
566
567         s->hash = hash[h];
568         hash[h] = s;
569         s->lexval = t;
570
571         v = gmalloc(sizeof(Value));
572         s->v = v;
573
574         v->fmt = 'X';
575         v->type = TINT;
576         memset(v, 0, sizeof(Value));
577
578         return s;
579 }
580
581 Lsym*
582 look(char *name)
583 {
584         Lsym *s;
585         uint h;
586         char *p;
587
588         h = 0;
589         for(p = name; *p; p++)
590                 h = h*3 + *p;
591         h %= Hashsize;
592
593         for(s = hash[h]; s; s = s->hash)
594                 if(strcmp(name, s->name) == 0)
595                         return s;
596         return 0;
597 }
598
599 Lsym*
600 mkvar(char *s)
601 {
602         Lsym *l;
603
604         l = look(s);
605         if(l == 0)
606                 l = enter(s, Tid);
607         return l;
608 }