]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/acid/lex.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / cmd / acid / lex.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6 #define Extern extern
7 #include "acid.h"
8 #include "y.tab.h"
9
10 struct keywd
11 {
12         char    *name;
13         int     terminal;
14 }
15 keywds[] =
16 {
17         "do",           Tdo,
18         "if",           Tif,
19         "then",         Tthen,
20         "else",         Telse,
21         "while",        Twhile,
22         "loop",         Tloop,
23         "head",         Thead,
24         "tail",         Ttail,
25         "append",       Tappend,
26         "defn",         Tfn,
27         "return",       Tret,
28         "local",        Tlocal,
29         "aggr",         Tcomplex,
30         "union",        Tcomplex,
31         "adt",          Tcomplex,
32         "complex",      Tcomplex,
33         "delete",       Tdelete,
34         "whatis",       Twhat,
35         "eval",         Teval,
36         "builtin",      Tbuiltin,
37         0,              0
38 };
39
40 char cmap[256] =
41 {
42         ['0']   '\0'+1,
43         ['n']   '\n'+1,
44         ['r']   '\r'+1,
45         ['t']   '\t'+1,
46         ['b']   '\b'+1,
47         ['f']   '\f'+1,
48         ['a']   '\a'+1,
49         ['v']   '\v'+1,
50         ['\\']  '\\'+1,
51         ['"']   '"'+1,
52 };
53
54 void
55 kinit(void)
56 {
57         int i;
58         
59         for(i = 0; keywds[i].name; i++) 
60                 enter(keywds[i].name, keywds[i].terminal);
61 }
62
63 typedef struct IOstack IOstack;
64 struct IOstack
65 {
66         char    *name;
67         int     line;
68         char    *text;
69         char    *ip;
70         Biobuf  *fin;
71         IOstack *prev;
72 };
73 IOstack *lexio;
74
75 void
76 pushfile(char *file)
77 {
78         Biobuf *b;
79         IOstack *io;
80
81         if(file)
82                 b = Bopen(file, OREAD);
83         else{
84                 b = Bopen("/fd/0", OREAD);
85                 file = "<stdin>";
86         }
87
88         if(b == 0)
89                 error("pushfile: %s: %r", file);
90
91         io = malloc(sizeof(IOstack));
92         if(io == 0)
93                 fatal("no memory");
94         io->name = strdup(file);
95         if(io->name == 0)
96                 fatal("no memory");
97         io->line = line;
98         line = 1;
99         io->text = 0;
100         io->fin = b;
101         io->prev = lexio;
102         lexio = io;
103 }
104
105 void
106 pushstr(Node *s)
107 {
108         IOstack *io;
109
110         io = malloc(sizeof(IOstack));
111         if(io == 0)
112                 fatal("no memory");
113         io->line = line;
114         line = 1;
115         io->name = strdup("<string>");
116         if(io->name == 0)
117                 fatal("no memory");
118         io->line = line;
119         line = 1;
120         io->text = strdup(s->string->string);
121         if(io->text == 0)
122                 fatal("no memory");
123         io->ip = io->text;
124         io->fin = 0;
125         io->prev = lexio;
126         lexio = io;
127 }
128
129 void
130 restartio(void)
131 {
132         Bflush(lexio->fin);
133         Binit(lexio->fin, 0, OREAD);
134 }
135
136 int
137 popio(void)
138 {
139         IOstack *s;
140
141         if(lexio == 0)
142                 return 0;
143
144         if(lexio->prev == 0){
145                 if(lexio->fin)
146                         restartio();
147                 return 0;
148         }
149
150         if(lexio->fin)
151                 Bterm(lexio->fin);
152         else
153                 free(lexio->text);
154         free(lexio->name);
155         line = lexio->line;
156         s = lexio;
157         lexio = s->prev;
158         free(s);
159         return 1;
160 }
161
162 int
163 Lfmt(Fmt *f)
164 {
165         int i;
166         char buf[1024];
167         IOstack *e;
168
169         e = lexio;
170         if(e) {
171                 i = snprint(buf, sizeof(buf), "%s:%d", e->name, line);
172                 while(e->prev) {
173                         e = e->prev;
174                         if(initialising && e->prev == 0)
175                                 break;
176                         i += snprint(buf+i, sizeof(buf)-i, " [%s:%d]", e->name, e->line);
177                 }
178         } else
179                 snprint(buf, sizeof(buf),  "no file:0");
180         fmtstrcpy(f, buf);
181         return 0;
182 }
183
184 void
185 unlexc(int s)
186 {
187         if(s == '\n')
188                 line--;
189
190         if(lexio->fin)
191                 Bungetc(lexio->fin);
192         else
193                 lexio->ip--;
194 }
195
196 int
197 lexc(void)
198 {
199         int c;
200
201         if(lexio->fin) {
202                 c = Bgetc(lexio->fin);
203                 if(gotint)
204                         error("interrupt");
205                 return c;
206         }
207
208         c = *lexio->ip++;
209         if(c == 0)
210                 return -1;
211         return c;
212 }
213
214 int
215 escchar(char c)
216 {
217         int n;
218         char buf[Strsize];
219
220         if(c >= '0' && c <= '9') {
221                 n = 1;
222                 buf[0] = c;
223                 for(;;) {
224                         c = lexc();
225                         if(c == Eof)
226                                 error("%d: <eof> in escape sequence", line);
227                         if(strchr("0123456789xX", c) == 0) {
228                                 unlexc(c);
229                                 break;
230                         }
231                         if(n >= Strsize)
232                                 error("string escape too long");
233                         buf[n++] = c;
234                 }
235                 buf[n] = '\0';
236                 return strtol(buf, 0, 0);
237         }
238
239         n = cmap[c];
240         if(n == 0)
241                 return c;
242         return n-1;
243 }
244
245 void
246 eatstring(void)
247 {
248         int esc, c, cnt;
249         char buf[Strsize];
250
251         esc = 0;
252         for(cnt = 0;;) {
253                 c = lexc();
254                 switch(c) {
255                 case Eof:
256                         error("%d: <eof> in string constant", line);
257
258                 case '\n':
259                         error("newline in string constant");
260                         goto done;
261
262                 case '\\':
263                         if(esc)
264                                 goto Default;
265                         esc = 1;
266                         break;
267
268                 case '"':
269                         if(esc == 0)
270                                 goto done;
271
272                         /* Fall through */
273                 default:
274                 Default:
275                         if(esc) {
276                                 c = escchar(c);
277                                 esc = 0;
278                         }
279                         buf[cnt++] = c;
280                         break;
281                 }
282                 if(cnt >= Strsize)
283                         error("string token too long");
284         }
285 done:
286         buf[cnt] = '\0';
287         yylval.string = strnode(buf);
288 }
289
290 void
291 eatnl(void)
292 {
293         int c;
294
295         line++;
296         for(;;) {
297                 c = lexc();
298                 if(c == Eof)
299                         error("eof in comment");
300                 if(c == '\n')
301                         return;
302         }
303 }
304
305 int
306 yylex(void)
307 {
308         int c;
309         extern char vfmt[];
310
311 loop:
312         Bflush(bout);
313         c = lexc();
314         switch(c) {
315         case Eof:
316                 if(gotint) {
317                         gotint = 0;
318                         stacked = 0;
319                         Bprint(bout, "\nacid: ");
320                         goto loop;
321                 }
322                 return Eof;
323
324         case '"':
325                 eatstring();
326                 return Tstring;
327
328         case ' ':
329         case '\t':
330                 goto loop;
331
332         case '\n':
333                 line++;
334                 if(interactive == 0)
335                         goto loop;
336                 if(stacked) {
337                         print("\t");
338                         goto loop;
339                 }
340                 return ';';
341
342         case '.':
343                 c = lexc();
344                 unlexc(c);
345                 if(isdigit(c))
346                         return numsym('.');
347
348                 return '.';
349  
350         case '(':
351         case ')':
352         case '[':
353         case ']':
354         case ';':
355         case ':':
356         case ',':
357         case '~':
358         case '?':
359         case '*':
360         case '@':
361         case '^':
362         case '%':
363                 return c;
364         case '{':
365                 stacked++;
366                 return c;
367         case '}':
368                 stacked--;
369                 return c;
370
371         case '\\':
372                 c = lexc();
373                 if(strchr(vfmt, c) == 0) {
374                         unlexc(c);
375                         return '\\';
376                 }
377                 yylval.ival = c;
378                 return Tfmt;
379
380         case '!':
381                 c = lexc();
382                 if(c == '=')
383                         return Tneq;
384                 unlexc(c);
385                 return '!';
386
387         case '+':
388                 c = lexc();
389                 if(c == '+')
390                         return Tinc;
391                 unlexc(c);
392                 return '+';
393
394         case '/':
395                 c = lexc();
396                 if(c == '/') {
397                         eatnl();
398                         goto loop;
399                 }
400                 unlexc(c);
401                 return '/';
402
403         case '\'':
404                 c = lexc();
405                 if(c == '\\')
406                         yylval.ival = escchar(lexc());
407                 else
408                         yylval.ival = c;
409                 c = lexc();
410                 if(c != '\'') {
411                         error("missing '");
412                         unlexc(c);
413                 }
414                 return Tconst;
415
416         case '&':
417                 c = lexc();
418                 if(c == '&')
419                         return Tandand;
420                 unlexc(c);
421                 return '&';
422
423         case '=':
424                 c = lexc();
425                 if(c == '=')
426                         return Teq;
427                 unlexc(c);
428                 return '=';
429
430         case '|':
431                 c = lexc();
432                 if(c == '|')
433                         return Toror;
434                 unlexc(c);
435                 return '|';
436
437         case '<':
438                 c = lexc();
439                 if(c == '=')
440                         return Tleq;
441                 if(c == '<')
442                         return Tlsh;
443                 unlexc(c);
444                 return '<';
445
446         case '>':
447                 c = lexc();
448                 if(c == '=')
449                         return Tgeq;
450                 if(c == '>')
451                         return Trsh;
452                 unlexc(c);
453                 return '>';
454
455         case '-':
456                 c = lexc();
457
458                 if(c == '>')
459                         return Tindir;
460
461                 if(c == '-')
462                         return Tdec;
463                 unlexc(c);
464                 return '-';
465
466         default:
467                 return numsym(c);
468         }
469 }
470
471 int
472 numsym(char first)
473 {
474         int c, isbin, isfloat, ishex;
475         char *sel, *p;
476         Lsym *s;
477
478         symbol[0] = first;
479         p = symbol;
480
481         ishex = 0;
482         isbin = 0;
483         isfloat = 0;
484         if(first == '.')
485                 isfloat = 1;
486
487         if(isdigit(*p++) || isfloat) {
488                 for(;;) {
489                         c = lexc();
490                         if(c < 0)
491                                 error("%d: <eof> eating symbols", line);
492
493                         if(c == '\n')
494                                 line++;
495                         sel = "01234567890.xb";
496                         if(ishex)
497                                 sel = "01234567890abcdefABCDEF";
498                         else if(isbin)
499                                 sel = "01";
500                         else if(isfloat)
501                                 sel = "01234567890eE-+";
502
503                         if(strchr(sel, c) == 0) {
504                                 unlexc(c);
505                                 break;
506                         }
507                         if(c == '.')
508                                 isfloat = 1;
509                         if(!isbin && c == 'x')
510                                 ishex = 1;
511                         if(!ishex && c == 'b')
512                                 isbin = 1;
513                         *p++ = c;
514                 }
515                 *p = '\0';
516                 if(isfloat) {
517                         yylval.fval = atof(symbol);
518                         return Tfconst;
519                 }
520
521                 if(isbin)
522                         yylval.ival = strtoull(symbol+2, 0, 2);
523                 else
524                         yylval.ival = strtoull(symbol, 0, 0);
525                 return Tconst;
526         }
527
528         for(;;) {
529                 c = lexc();
530                 if(c < 0)
531                         error("%d <eof> eating symbols", line);
532                 if(c == '\n')
533                         line++;
534                 if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) {   /* checking against ~ lets UTF names through */
535                         unlexc(c);
536                         break;
537                 }
538                 *p++ = c;
539         }
540
541         *p = '\0';
542
543         s = look(symbol);
544         if(s == 0)
545                 s = enter(symbol, Tid);
546
547         yylval.sym = s;
548         return s->lexval;
549 }
550
551 Lsym*
552 enter(char *name, int t)
553 {
554         Lsym *s;
555         uint h;
556         char *p;
557         Value *v;
558
559         h = 0;
560         for(p = name; *p; p++)
561                 h = h*3 + *p;
562         h %= Hashsize;
563
564         s = gmalloc(sizeof(Lsym));
565         memset(s, 0, sizeof(Lsym));
566         s->name = strdup(name);
567
568         s->hash = hash[h];
569         hash[h] = s;
570         s->lexval = t;
571
572         v = gmalloc(sizeof(Value));
573         s->v = v;
574
575         v->fmt = 'X';
576         v->type = TINT;
577         memset(v, 0, sizeof(Value));
578
579         return s;
580 }
581
582 Lsym*
583 look(char *name)
584 {
585         Lsym *s;
586         uint h;
587         char *p;
588
589         h = 0;
590         for(p = name; *p; p++)
591                 h = h*3 + *p;
592         h %= Hashsize;
593
594         for(s = hash[h]; s; s = s->hash)
595                 if(strcmp(name, s->name) == 0)
596                         return s;
597         return 0;
598 }
599
600 Lsym*
601 mkvar(char *s)
602 {
603         Lsym *l;
604
605         l = look(s);
606         if(l == 0)
607                 l = enter(s, Tid);
608         return l;
609 }