]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/cc/lex.c
5419f26bf8305d0304deaad4bc21dfe73834e110
[plan9front.git] / sys / src / cmd / cc / lex.c
1 #include        "cc.h"
2 #include        "y.tab.h"
3
4 #ifndef CPP
5 #define CPP     "/bin/cpp"
6 #endif
7
8 /*
9  * known debug flags
10  * flags without a description may be exclusive to certain architectures
11  *      -.              Inhibit search for includes in source directory
12  *      -0
13  *      -4
14  *      -8
15  *      -<
16
17  *      -A              !B
18  *      -B              non ANSI
19  *      -C
20  *      -D name         define
21  *      -F              format specification check
22  *      -G
23  *      -H
24  *      -I path         include
25  *      -L              print every NAME symbol
26  *      -M              constant multiplication
27  *      -N
28  *      -P              peephole
29  *      -Q
30  *      -R              print registerization
31  *      -S              print assembly
32  *      -T
33  *      -V              enable void* conversion warnings
34  *      -W
35  *      -X              abort on error
36  *      -Y
37  *      -Z
38
39  *      -a              acid declaration output
40  *      -c
41  *      -d              print declarations
42  *      -e
43  *      -f
44  *      -g
45  *      -h
46  *      -i              print initialization
47  *      -l              generate little-endian code
48  *      -m              print add/sub/mul trees
49  *      -n              print acid to file (%.c=%.acid) (with -a or -aa)
50  *      -o file         output file
51  *      -p              use standard cpp ANSI preprocessor (not on windows)
52  *      -s              print structure offsets (with -a or -aa)
53  *      -t              print type trees
54  *      -v              verbose printing
55  *      -w              print warnings
56  *      -x
57  *      -y
58  */
59
60 void
61 main(int argc, char *argv[])
62 {
63         char *defs[50], *p;
64         int nproc, nout, status, i, c, ndef;
65
66         memset(debug, 0, sizeof(debug));
67         tinit();
68         cinit();
69         ginit();
70         arginit();
71
72         profileflg = 1; /* #pragma can turn it off */
73         tufield = simplet((1L<<tfield->etype) | BUNSIGNED);
74         ndef = 0;
75         outfile = 0;
76         include[ninclude++] = ".";
77         ARGBEGIN {
78         default:
79                 c = ARGC();
80                 if(c >= 0 && c < sizeof(debug))
81                         debug[c]++;
82                 break;
83
84         case 'l':                       /* for little-endian mips */
85                 if(thechar != 'v'){
86                         print("can only use -l with vc");
87                         errorexit();
88                 }
89                 thechar = '0';
90                 thestring = "spim";
91                 break;
92
93         case 'o':
94                 outfile = ARGF();
95                 break;
96
97         case 'D':
98                 p = ARGF();
99                 if(p) {
100                         defs[ndef++] = p;
101                         dodefine(p);
102                 }
103                 break;
104
105         case 'I':
106                 p = ARGF();
107                 if(p)
108                         setinclude(p);
109                 break;
110         } ARGEND
111         if(argc < 1 && outfile == 0) {
112                 print("usage: %cc [-options] files\n", thechar);
113                 errorexit();
114         }
115         if(argc > 1 && systemtype(Windows)){
116                 print("can't compile multiple files on windows\n");
117                 errorexit();
118         }
119         if(argc > 1 && !systemtype(Windows)) {
120                 nproc = 1;
121                 /*
122                  * if we're writing acid to standard output, don't compile
123                  * concurrently, to avoid interleaving output.
124                  */
125                 if(((!debug['a'] && !debug['Z']) || debug['n']) &&
126                     (p = getenv("NPROC")) != nil)
127                         nproc = atol(p);        /* */
128                 c = 0;
129                 nout = 0;
130                 for(;;) {
131                         while(nout < nproc && argc > 0) {
132                                 i = myfork();
133                                 if(i < 0) {
134                                         i = mywait(&status);
135                                         if(i < 0) {
136                                                 print("cannot create a process\n");
137                                                 errorexit();
138                                         }
139                                         if(status)
140                                                 c++;
141                                         nout--;
142                                         continue;
143                                 }
144                                 if(i == 0) {
145                                         fprint(2, "%s:\n", *argv);
146                                         if (compile(*argv, defs, ndef))
147                                                 errorexit();
148                                         exits(0);
149                                 }
150                                 nout++;
151                                 argc--;
152                                 argv++;
153                         }
154                         i = mywait(&status);
155                         if(i < 0) {
156                                 if(c)
157                                         errorexit();
158                                 exits(0);
159                         }
160                         if(status)
161                                 c++;
162                         nout--;
163                 }
164         }
165
166         if(argc == 0)
167                 c = compile("stdin", defs, ndef);
168         else
169                 c = compile(argv[0], defs, ndef);
170
171         if(c)
172                 errorexit();
173         exits(0);
174 }
175
176 int
177 compile(char *file, char **defs, int ndef)
178 {
179         char ofile[400], incfile[20];
180         char *p, *av[100], opt[256];
181         int i, c, fd[2];
182         static int first = 1;
183
184         strcpy(ofile, file);
185         p = utfrrune(ofile, pathchar());
186         if(p) {
187                 *p++ = 0;
188                 if(!debug['.'])
189                         include[0] = strdup(ofile);
190         } else
191                 p = ofile;
192
193         if(outfile == 0) {
194                 outfile = p;
195                 if(outfile) {
196                         if(p = utfrrune(outfile, '.'))
197                                 if(p[1] == 'c' && p[2] == 0)
198                                         p[0] = 0;
199                         p = utfrune(outfile, 0);
200                         if(debug['a'] && debug['n'])
201                                 strcat(p, ".acid");
202                         else if(debug['Z'] && debug['n'])
203                                 strcat(p, "_pickle.c");
204                         else {
205                                 p[0] = '.';
206                                 p[1] = thechar;
207                                 p[2] = 0;
208                         }
209                 } else
210                         outfile = "/dev/null";
211         }
212
213         if(p = getenv("INCLUDE")) {
214                 setinclude(p);
215         } else {
216                 if(systemtype(Plan9)) {
217                         sprint(incfile, "/%s/include", thestring);
218                         setinclude(strdup(incfile));
219                         setinclude("/sys/include");
220                 }
221         }
222         if (first)
223                 Binit(&diagbuf, 1, OWRITE);
224         /*
225          * if we're writing acid to standard output, don't keep scratching
226          * outbuf.
227          */
228         if((debug['a'] || debug['Z']) && !debug['n']) {
229                 if (first) {
230                         outfile = 0;
231                         Binit(&outbuf, dup(1, -1), OWRITE);
232                         dup(2, 1);
233                 }
234         } else {
235                 c = mycreat(outfile, 0664);
236                 if(c < 0) {
237                         diag(Z, "cannot open %s - %r", outfile);
238                         outfile = 0;
239                         errorexit();
240                 }
241                 Binit(&outbuf, c, OWRITE);
242         }
243         newio();
244         first = 0;
245
246         /* Use an ANSI preprocessor */
247         if(debug['p']) {
248                 if(systemtype(Windows)) {
249                         diag(Z, "-p option not supported on windows");
250                         errorexit();
251                 }
252                 if(myaccess(file) < 0) {
253                         diag(Z, "%s does not exist", file);
254                         errorexit();
255                 }
256                 if(mypipe(fd) < 0) {
257                         diag(Z, "pipe failed");
258                         errorexit();
259                 }
260                 switch(myfork()) {
261                 case -1:
262                         diag(Z, "fork failed");
263                         errorexit();
264                 case 0:
265                         close(fd[0]);
266                         mydup(fd[1], 1);
267                         close(fd[1]);
268                         av[0] = CPP;
269                         i = 1;
270                         sprint(opt, "-+");
271                         av[i++] = strdup(opt);
272                         if(debug['.']){
273                                 sprint(opt, "-.");
274                                 av[i++] = strdup(opt);
275                         }
276                         for(c = 0; c < ndef; c++) {
277                                 sprint(opt, "-D%s", defs[c]);
278                                 av[i++] = strdup(opt);
279                         }
280                         for(c = 0; c < ninclude; c++) {
281                                 sprint(opt, "-I%s", include[c]);
282                                 av[i++] = strdup(opt);
283                         }
284                         if(strcmp(file, "stdin") != 0)
285                                 av[i++] = file;
286                         av[i] = 0;
287                         if(debug['p'] > 1) {
288                                 for(c = 0; c < i; c++)
289                                         fprint(2, "%s ", av[c]);
290                                 fprint(2, "\n");
291                         }
292                         myexec(av[0], av);
293                         fprint(2, "can't exec C preprocessor %s: %r\n", CPP);
294                         errorexit();
295                 default:
296                         close(fd[1]);
297                         newfile(file, fd[0]);
298                         break;
299                 }
300         } else {
301                 if(strcmp(file, "stdin") == 0)
302                         newfile(file, 0);
303                 else
304                         newfile(file, -1);
305         }
306         yyparse();
307         if(!debug['a'] && !debug['Z'])
308                 gclean();
309         return nerrors;
310 }
311
312 void
313 errorexit(void)
314 {
315         if(outfile)
316                 remove(outfile);
317         exits("error");
318 }
319
320 void
321 pushio(void)
322 {
323         Io *i;
324
325         i = iostack;
326         if(i == I) {
327                 yyerror("botch in pushio");
328                 errorexit();
329         }
330         i->p = fi.p;
331         i->c = fi.c;
332 }
333
334 void
335 newio(void)
336 {
337         Io *i;
338         static int pushdepth = 0;
339
340         i = iofree;
341         if(i == I) {
342                 pushdepth++;
343                 if(pushdepth > 1000) {
344                         yyerror("macro/io expansion too deep");
345                         errorexit();
346                 }
347                 i = alloc(sizeof(*i));
348         } else
349                 iofree = i->link;
350         i->c = 0;
351         i->f = -1;
352         ionext = i;
353 }
354
355 void
356 newfile(char *s, int f)
357 {
358         Io *i;
359
360         if(debug['e'])
361                 print("%L: %s\n", lineno, s);
362
363         i = ionext;
364         i->link = iostack;
365         iostack = i;
366         i->f = f;
367         if(f < 0)
368                 i->f = open(s, 0);
369         if(i->f < 0) {
370                 yyerror("%cc: %r: %s", thechar, s);
371                 errorexit();
372         }
373         fi.c = 0;
374         linehist(s, 0);
375 }
376
377 Sym*
378 slookup(char *s)
379 {
380
381         strcpy(symb, s);
382         return lookup();
383 }
384
385 Sym*
386 lookup(void)
387 {
388         Sym *s;
389         ulong h;
390         char *p;
391         int c, n;
392
393         h = 0;
394         for(p=symb; *p;) {
395                 h = h * 3;
396                 h += *p++;
397         }
398         n = (p - symb) + 1;
399         if((long)h < 0)
400                 h = ~h;
401         h %= NHASH;
402         c = symb[0];
403         for(s = hash[h]; s != S; s = s->link) {
404                 if(s->name[0] != c)
405                         continue;
406                 if(strcmp(s->name, symb) == 0)
407                         return s;
408         }
409         s = alloc(sizeof(*s));
410         s->name = alloc(n);
411         memmove(s->name, symb, n);
412
413         strcpy(s->name, symb);
414         s->link = hash[h];
415         hash[h] = s;
416         syminit(s);
417
418         return s;
419 }
420
421 void
422 syminit(Sym *s)
423 {
424         s->lexical = LNAME;
425         s->block = 0;
426         s->offset = 0;
427         s->type = T;
428         s->suetag = T;
429         s->class = CXXX;
430         s->aused = 0;
431         s->sig = SIGNONE;
432 }
433
434 #define EOF     (-1)
435 #define IGN     (-2)
436 #define ESC     (1<<20)
437 #define GETC()  ((--fi.c < 0)? filbuf(): (*fi.p++ & 0xff))
438
439 enum
440 {
441         Numdec          = 1<<0,
442         Numlong         = 1<<1,
443         Numuns          = 1<<2,
444         Numvlong        = 1<<3,
445         Numflt          = 1<<4,
446 };
447
448 long
449 yylex(void)
450 {
451         vlong vv;
452         long c, c1, t;
453         char *cp;
454         Rune rune;
455         Sym *s;
456
457         if(peekc != IGN) {
458                 c = peekc;
459                 peekc = IGN;
460                 goto l1;
461         }
462 l0:
463         c = GETC();
464
465 l1:
466         if(c >= Runeself) {
467                 /*
468                  * extension --
469                  *      all multibyte runes are alpha
470                  */
471                 cp = symb;
472                 goto talph;
473         }
474         if(isspace(c)) {
475                 if(c == '\n')
476                         lineno++;
477                 goto l0;
478         }
479         if(isalpha(c)) {
480                 cp = symb;
481                 if(c != 'L')
482                         goto talph;
483                 *cp++ = c;
484                 c = GETC();
485                 if(c == '\'') {
486                         /* L'x' */
487                         c = escchar('\'', 1, 0);
488                         if(c == EOF)
489                                 c = '\'';
490                         c1 = escchar('\'', 1, 0);
491                         if(c1 != EOF) {
492                                 yyerror("missing '");
493                                 peekc = c1;
494                         }
495                         yylval.vval = convvtox(c, TRUNE);
496                         return LUCONST;
497                 }
498                 if(c == '"') {
499                         goto caselq;
500                 }
501                 goto talph;
502         }
503         if(isdigit(c))
504                 goto tnum;
505         switch(c)
506         {
507
508         case EOF:
509                 peekc = EOF;
510                 return -1;
511
512         case '_':
513                 cp = symb;
514                 goto talph;
515
516         case '#':
517                 domacro();
518                 goto l0;
519
520         case '.':
521                 c1 = GETC();
522                 if(isdigit(c1)) {
523                         cp = symb;
524                         *cp++ = c;
525                         c = c1;
526                         c1 = 0;
527                         goto casedot;
528                 }
529                 break;
530
531         case '"':
532                 strcpy(symb, "\"<string>\"");
533                 cp = alloc(0);
534                 c1 = 0;
535
536                 /* "..." */
537                 for(;;) {
538                         c = escchar('"', 0, 1);
539                         if(c == EOF)
540                                 break;
541                         if(c & ESC) {
542                                 cp = allocn(cp, c1, 1);
543                                 cp[c1++] = c;
544                         } else {
545                                 rune = c;
546                                 c = runelen(rune);
547                                 cp = allocn(cp, c1, c);
548                                 runetochar(cp+c1, &rune);
549                                 c1 += c;
550                         }
551                 }
552                 yylval.sval.l = c1;
553                 do {
554                         cp = allocn(cp, c1, 1);
555                         cp[c1++] = 0;
556                 } while(c1 & MAXALIGN);
557                 yylval.sval.s = cp;
558                 return LSTRING;
559
560         caselq:
561                 /* L"..." */
562                 strcpy(symb, "\"L<string>\"");
563                 cp = alloc(0);
564                 c1 = 0;
565                 for(;;) {
566                         c = escchar('"', 1, 0);
567                         if(c == EOF)
568                                 break;
569                         cp = allocn(cp, c1, sizeof(Rune));
570                         *(Rune*)(cp + c1) = c;
571                         c1 += sizeof(Rune);
572                 }
573                 yylval.sval.l = c1;
574                 do {
575                         cp = allocn(cp, c1, sizeof(Rune));
576                         *(Rune*)(cp + c1) = 0;
577                         c1 += sizeof(Rune);
578                 } while(c1 & MAXALIGN);
579                 yylval.sval.s = cp;
580                 return LLSTRING;
581
582         case '\'':
583                 /* '.' */
584                 c = escchar('\'', 0, 0);
585                 if(c == EOF)
586                         c = '\'';
587                 c1 = escchar('\'', 0, 0);
588                 if(c1 != EOF) {
589                         yyerror("missing '");
590                         peekc = c1;
591                 }
592                 vv = c;
593                 yylval.vval = convvtox(vv, TUCHAR);
594                 if(yylval.vval != vv)
595                         yyerror("overflow in character constant: 0x%lx", c);
596                 else
597                 if(c & 0x80){
598                         nearln = lineno;
599                         warn(Z, "sign-extended character constant");
600                 }
601                 yylval.vval = convvtox(vv, TCHAR);
602                 return LCONST;
603
604         case '/':
605                 c1 = GETC();
606                 if(c1 == '*') {
607                         for(;;) {
608                                 c = getr();
609                                 while(c == '*') {
610                                         c = getr();
611                                         if(c == '/')
612                                                 goto l0;
613                                 }
614                                 if(c == EOF) {
615                                         yyerror("eof in comment");
616                                         errorexit();
617                                 }
618                         }
619                 }
620                 if(c1 == '/') {
621                         for(;;) {
622                                 c = getr();
623                                 if(c == '\n')
624                                         goto l0;
625                                 if(c == EOF) {
626                                         yyerror("eof in comment");
627                                         errorexit();
628                                 }
629                         }
630                 }
631                 if(c1 == '=')
632                         return LDVE;
633                 break;
634
635         case '*':
636                 c1 = GETC();
637                 if(c1 == '=')
638                         return LMLE;
639                 break;
640
641         case '%':
642                 c1 = GETC();
643                 if(c1 == '=')
644                         return LMDE;
645                 break;
646
647         case '+':
648                 c1 = GETC();
649                 if(c1 == '+')
650                         return LPP;
651                 if(c1 == '=')
652                         return LPE;
653                 break;
654
655         case '-':
656                 c1 = GETC();
657                 if(c1 == '-')
658                         return LMM;
659                 if(c1 == '=')
660                         return LME;
661                 if(c1 == '>')
662                         return LMG;
663                 break;
664
665         case '>':
666                 c1 = GETC();
667                 if(c1 == '>') {
668                         c = LRSH;
669                         c1 = GETC();
670                         if(c1 == '=')
671                                 return LRSHE;
672                         break;
673                 }
674                 if(c1 == '=')
675                         return LGE;
676                 break;
677
678         case '<':
679                 c1 = GETC();
680                 if(c1 == '<') {
681                         c = LLSH;
682                         c1 = GETC();
683                         if(c1 == '=')
684                                 return LLSHE;
685                         break;
686                 }
687                 if(c1 == '=')
688                         return LLE;
689                 break;
690
691         case '=':
692                 c1 = GETC();
693                 if(c1 == '=')
694                         return LEQ;
695                 break;
696
697         case '!':
698                 c1 = GETC();
699                 if(c1 == '=')
700                         return LNE;
701                 break;
702
703         case '&':
704                 c1 = GETC();
705                 if(c1 == '&')
706                         return LANDAND;
707                 if(c1 == '=')
708                         return LANDE;
709                 break;
710
711         case '|':
712                 c1 = GETC();
713                 if(c1 == '|')
714                         return LOROR;
715                 if(c1 == '=')
716                         return LORE;
717                 break;
718
719         case '^':
720                 c1 = GETC();
721                 if(c1 == '=')
722                         return LXORE;
723                 break;
724
725         default:
726                 return c;
727         }
728         peekc = c1;
729         return c;
730
731 talph:
732         /*
733          * cp is set to symb and some
734          * prefix has been stored
735          */
736         for(;;) {
737                 if(cp >= &symb[NSYMB-UTFmax-1])
738                         goto toolong;
739                 if(c >= Runeself) {
740                         for(c1=0;;) {
741                                 cp[c1++] = c;
742                                 if(fullrune(cp, c1))
743                                         break;
744                                 c = GETC();
745                         }
746                         cp += c1;
747                         c = GETC();
748                         continue;
749                 }
750                 if(!isalnum(c) && c != '_')
751                         break;
752                 *cp++ = c;
753                 c = GETC();
754         }
755         *cp = 0;
756         if(debug['L'])
757                 print("%L: %s\n", lineno, symb);
758         peekc = c;
759         s = lookup();
760         if(s->macro) {
761                 newio();
762                 cp = ionext->b;
763                 macexpand(s, cp);
764                 pushio();
765                 ionext->link = iostack;
766                 iostack = ionext;
767                 fi.p = cp;
768                 fi.c = strlen(cp);
769                 if(peekc != IGN) {
770                         cp[fi.c++] = peekc;
771                         cp[fi.c] = 0;
772                         peekc = IGN;
773                 }
774                 goto l0;
775         }
776         yylval.sym = s;
777         if(s->class == CTYPEDEF || s->class == CTYPESTR)
778                 return LTYPE;
779         return s->lexical;
780
781 tnum:
782         c1 = 0;
783         cp = symb;
784         if(c != '0') {
785                 c1 |= Numdec;
786                 for(;;) {
787                         if(cp >= &symb[NSYMB-1])
788                                 goto toolong;
789                         *cp++ = c;
790                         c = GETC();
791                         if(isdigit(c))
792                                 continue;
793                         goto dc;
794                 }
795         }
796         *cp++ = c;
797         c = GETC();
798         if(c == 'x' || c == 'X')
799                 for(;;) {
800                         if(cp >= &symb[NSYMB-1])
801                                 goto toolong;
802                         *cp++ = c;
803                         c = GETC();
804                         if(isdigit(c))
805                                 continue;
806                         if(c >= 'a' && c <= 'f')
807                                 continue;
808                         if(c >= 'A' && c <= 'F')
809                                 continue;
810                         if(cp == symb+2)
811                                 yyerror("malformed hex constant");
812                         goto ncu;
813                 }
814         if(c < '0' || c > '7')
815                 goto dc;
816         for(;;) {
817                 if(c >= '0' && c <= '7') {
818                         if(cp >= &symb[NSYMB-1])
819                                 goto toolong;
820                         *cp++ = c;
821                         c = GETC();
822                         continue;
823                 }
824                 goto ncu;
825         }
826
827 dc:
828         if(c == '.')
829                 goto casedot;
830         if(c == 'e' || c == 'E')
831                 goto casee;
832
833 ncu:
834         if((c == 'U' || c == 'u') && !(c1 & Numuns)) {
835                 c = GETC();
836                 c1 |= Numuns;
837                 goto ncu;
838         }
839         if((c == 'L' || c == 'l') && !(c1 & Numvlong)) {
840                 c = GETC();
841                 if(c1 & Numlong)
842                         c1 |= Numvlong;
843                 c1 |= Numlong;
844                 goto ncu;
845         }
846         *cp = 0;
847         peekc = c;
848         if(mpatov(symb, &yylval.vval))
849                 yyerror("overflow in constant");
850
851         vv = yylval.vval;
852         if(c1 & Numvlong) {
853                 if((c1 & Numuns) || convvtox(vv, TVLONG) < 0) {
854                         c = LUVLCONST;
855                         t = TUVLONG;
856                         goto nret;
857                 }
858                 c = LVLCONST;
859                 t = TVLONG;
860                 goto nret;
861         }
862         if(c1 & Numlong) {
863                 if((c1 & Numuns) || convvtox(vv, TLONG) < 0) {
864                         c = LULCONST;
865                         t = TULONG;
866                         goto nret;
867                 }
868                 c = LLCONST;
869                 t = TLONG;
870                 goto nret;
871         }
872         if((c1 & Numuns) || convvtox(vv, TINT) < 0) {
873                 c = LUCONST;
874                 t = TUINT;
875                 goto nret;
876         }
877         c = LCONST;
878         t = TINT;
879         goto nret;
880
881 nret:
882         yylval.vval = convvtox(vv, t);
883         if(yylval.vval != vv){
884                 nearln = lineno;
885                 warn(Z, "truncated constant: %T %s", types[t], symb);
886         }
887         return c;
888
889 casedot:
890         for(;;) {
891                 if(cp >= &symb[NSYMB-1])
892                         goto toolong;
893                 *cp++ = c;
894                 c = GETC();
895                 if(!isdigit(c))
896                         break;
897         }
898         if(c != 'e' && c != 'E')
899                 goto caseout;
900
901 casee:
902         if(cp >= &symb[NSYMB-2])
903                 goto toolong;
904         *cp++ = 'e';
905         c = GETC();
906         if(c == '+' || c == '-') {
907                 *cp++ = c;
908                 c = GETC();
909         }
910         if(!isdigit(c))
911                 yyerror("malformed fp constant exponent");
912         while(isdigit(c)) {
913                 if(cp >= &symb[NSYMB-1])
914                         goto toolong;
915                 *cp++ = c;
916                 c = GETC();
917         }
918
919 caseout:
920         if(c == 'L' || c == 'l') {
921                 c = GETC();
922                 c1 |= Numlong;
923         } else
924         if(c == 'F' || c == 'f') {
925                 c = GETC();
926                 c1 |= Numflt;
927         }
928         *cp = 0;
929         peekc = c;
930         yylval.dval = strtod(symb, nil);
931         if(isInf(yylval.dval, 1) || isInf(yylval.dval, -1)) {
932                 yyerror("overflow in float constant");
933                 yylval.dval = 0;
934         }
935         if(c1 & Numflt)
936                 return LFCONST;
937         return LDCONST;
938
939 toolong:
940         yyerror("token too long: %.*s...", utfnlen(symb, cp-symb), symb);
941         errorexit();
942         return -1;
943 }
944
945 /*
946  * convert a string, s, to vlong in *v
947  * return conversion overflow.
948  * required syntax is [0[x]]d*
949  */
950 int
951 mpatov(char *s, vlong *v)
952 {
953         vlong n, nn;
954         int c;
955
956         n = 0;
957         c = *s;
958         if(c == '0')
959                 goto oct;
960         while(c = *s++) {
961                 if(c >= '0' && c <= '9')
962                         nn = n*10 + c-'0';
963                 else
964                         goto bad;
965                 if(n < 0 && nn >= 0)
966                         goto bad;
967                 n = nn;
968         }
969         goto out;
970
971 oct:
972         s++;
973         c = *s;
974         if(c == 'x' || c == 'X')
975                 goto hex;
976         while(c = *s++) {
977                 if(c >= '0' || c <= '7')
978                         nn = n*8 + c-'0';
979                 else
980                         goto bad;
981                 if(n < 0 && nn >= 0)
982                         goto bad;
983                 n = nn;
984         }
985         goto out;
986
987 hex:
988         s++;
989         while(c = *s++) {
990                 if(c >= '0' && c <= '9')
991                         c += 0-'0';
992                 else
993                 if(c >= 'a' && c <= 'f')
994                         c += 10-'a';
995                 else
996                 if(c >= 'A' && c <= 'F')
997                         c += 10-'A';
998                 else
999                         goto bad;
1000                 nn = n*16 + c;
1001                 if(n < 0 && nn >= 0)
1002                         goto bad;
1003                 n = nn;
1004         }
1005 out:
1006         *v = n;
1007         return 0;
1008
1009 bad:
1010         *v = ~0;
1011         return 1;
1012 }
1013
1014 int
1015 getc(void)
1016 {
1017         int c;
1018
1019         if(peekc != IGN) {
1020                 c = peekc;
1021                 peekc = IGN;
1022         } else
1023                 c = GETC();
1024         if(c == '\n')
1025                 lineno++;
1026         if(c == EOF) {
1027                 yyerror("End of file");
1028                 errorexit();
1029         }
1030         return c;
1031 }
1032
1033 long
1034 getr(void)
1035 {
1036         int c, i;
1037         char str[UTFmax+1];
1038         Rune rune;
1039
1040
1041         c = getc();
1042         if(c < Runeself)
1043                 return c;
1044         i = 0;
1045         str[i++] = c;
1046
1047 loop:
1048         c = getc();
1049         str[i++] = c;
1050         if(!fullrune(str, i))
1051                 goto loop;
1052         c = chartorune(&rune, str);
1053         if(rune == Runeerror && c == 1) {
1054                 nearln = lineno;
1055                 diag(Z, "illegal rune in string");
1056                 for(c=0; c<i; c++)
1057                         print(" %.2x", *(uchar*)(str+c));
1058                 print("\n");
1059         }
1060         return rune;
1061 }
1062
1063 int
1064 getnsc(void)
1065 {
1066         int c;
1067
1068         if(peekc != IGN) {
1069                 c = peekc;
1070                 peekc = IGN;
1071         } else
1072                 c = GETC();
1073         for(;;) {
1074                 if(c >= Runeself || !isspace(c))
1075                         return c;
1076                 if(c == '\n') {
1077                         lineno++;
1078                         return c;
1079                 }
1080                 c = GETC();
1081         }
1082 }
1083
1084 void
1085 unget(int c)
1086 {
1087
1088         peekc = c;
1089         if(c == '\n')
1090                 lineno--;
1091 }
1092
1093 long
1094 escchar(long e, int longflg, int escflg)
1095 {
1096         long c, l;
1097         int i;
1098
1099 loop:
1100         c = getr();
1101         if(c == '\n') {
1102                 yyerror("newline in string");
1103                 return EOF;
1104         }
1105         if(c != '\\') {
1106                 if(c == e)
1107                         c = EOF;
1108                 return c;
1109         }
1110         c = getr();
1111         if(c == 'x') {
1112                 /*
1113                  * note this is not ansi,
1114                  * supposed to only accept 2 hex
1115                  */
1116                 i = 2;
1117                 if(longflg)
1118                         i = 6;
1119                 l = 0;
1120                 for(; i>0; i--) {
1121                         c = getc();
1122                         if(c >= '0' && c <= '9') {
1123                                 l = l*16 + c-'0';
1124                                 continue;
1125                         }
1126                         if(c >= 'a' && c <= 'f') {
1127                                 l = l*16 + c-'a' + 10;
1128                                 continue;
1129                         }
1130                         if(c >= 'A' && c <= 'F') {
1131                                 l = l*16 + c-'A' + 10;
1132                                 continue;
1133                         }
1134                         unget(c);
1135                         break;
1136                 }
1137                 if(escflg)
1138                         l |= ESC;
1139                 return l;
1140         }
1141         if(c >= '0' && c <= '7') {
1142                 /*
1143                  * note this is not ansi,
1144                  * supposed to only accept 3 oct
1145                  */
1146                 i = 2;
1147                 if(longflg)
1148                         i = 6;
1149                 l = c - '0';
1150                 for(; i>0; i--) {
1151                         c = getc();
1152                         if(c >= '0' && c <= '7') {
1153                                 l = l*8 + c-'0';
1154                                 continue;
1155                         }
1156                         unget(c);
1157                 }
1158                 if(escflg)
1159                         l |= ESC;
1160                 return l;
1161         }
1162         switch(c)
1163         {
1164         case '\n':      goto loop;
1165         case 'n':       return '\n';
1166         case 't':       return '\t';
1167         case 'b':       return '\b';
1168         case 'r':       return '\r';
1169         case 'f':       return '\f';
1170         case 'a':       return '\a';
1171         case 'v':       return '\v';
1172         }
1173         return c;
1174 }
1175
1176 struct
1177 {
1178         char    *name;
1179         ushort  lexical;
1180         ushort  type;
1181 } itab[] =
1182 {
1183         "auto",         LAUTO,          0,
1184         "break",        LBREAK,         0,
1185         "case",         LCASE,          0,
1186         "char",         LCHAR,          TCHAR,
1187         "const",        LCONSTNT,       0,
1188         "continue",     LCONTINUE,      0,
1189         "default",      LDEFAULT,       0,
1190         "do",           LDO,            0,
1191         "double",       LDOUBLE,        TDOUBLE,
1192         "else",         LELSE,          0,
1193         "enum",         LENUM,          0,
1194         "extern",       LEXTERN,        0,
1195         "float",        LFLOAT,         TFLOAT,
1196         "for",          LFOR,           0,
1197         "goto",         LGOTO,          0,
1198         "if",           LIF,            0,
1199         "inline",       LINLINE,        0,
1200         "int",          LINT,           TINT,
1201         "long",         LLONG,          TLONG,
1202         "register",     LREGISTER,      0,
1203         "restrict",     LRESTRICT,      0,
1204         "return",       LRETURN,        0,
1205         "SET",          LSET,           0,
1206         "short",        LSHORT,         TSHORT,
1207         "signed",       LSIGNED,        0,
1208         "signof",       LSIGNOF,        0,
1209         "sizeof",       LSIZEOF,        0,
1210         "static",       LSTATIC,        0,
1211         "struct",       LSTRUCT,        0,
1212         "switch",       LSWITCH,        0,
1213         "typedef",      LTYPEDEF,       0,
1214         "typestr",      LTYPESTR,       0,
1215         "union",        LUNION,         0,
1216         "unsigned",     LUNSIGNED,      0,
1217         "USED",         LUSED,          0,
1218         "void",         LVOID,          TVOID,
1219         "volatile",     LVOLATILE,      0,
1220         "while",        LWHILE,         0,
1221         0
1222 };
1223
1224 void
1225 cinit(void)
1226 {
1227         Sym *s;
1228         int i;
1229         Type *t;
1230
1231         nerrors = 0;
1232         lineno = 1;
1233         iostack = I;
1234         iofree = I;
1235         peekc = IGN;
1236         nhunk = 0;
1237
1238         types[TXXX] = T;
1239         types[TCHAR] = typ(TCHAR, T);
1240         types[TUCHAR] = typ(TUCHAR, T);
1241         types[TSHORT] = typ(TSHORT, T);
1242         types[TUSHORT] = typ(TUSHORT, T);
1243         types[TINT] = typ(TINT, T);
1244         types[TUINT] = typ(TUINT, T);
1245         types[TLONG] = typ(TLONG, T);
1246         types[TULONG] = typ(TULONG, T);
1247         types[TVLONG] = typ(TVLONG, T);
1248         types[TUVLONG] = typ(TUVLONG, T);
1249         types[TFLOAT] = typ(TFLOAT, T);
1250         types[TDOUBLE] = typ(TDOUBLE, T);
1251         types[TVOID] = typ(TVOID, T);
1252         types[TENUM] = typ(TENUM, T);
1253         types[TFUNC] = typ(TFUNC, types[TINT]);
1254         types[TIND] = typ(TIND, types[TVOID]);
1255
1256         for(i=0; i<NHASH; i++)
1257                 hash[i] = S;
1258         for(i=0; itab[i].name; i++) {
1259                 s = slookup(itab[i].name);
1260                 s->lexical = itab[i].lexical;
1261                 if(itab[i].type != 0)
1262                         s->type = types[itab[i].type];
1263         }
1264         blockno = 0;
1265         autobn = 0;
1266         autoffset = 0;
1267
1268         t = typ(TARRAY, types[TCHAR]);
1269         t->width = 0;
1270         symstring = slookup(".string");
1271         symstring->class = CSTATIC;
1272         symstring->type = t;
1273
1274         t = typ(TARRAY, types[TCHAR]);
1275         t->width = 0;
1276
1277         nodproto = new(OPROTO, Z, Z);
1278         dclstack = D;
1279
1280         pathname = allocn(pathname, 0, 100);
1281         if(mygetwd(pathname, 99) == 0) {
1282                 pathname = allocn(pathname, 100, 900);
1283                 if(mygetwd(pathname, 999) == 0)
1284                         strcpy(pathname, "/???");
1285         }
1286
1287         fmtinstall('O', Oconv);
1288         fmtinstall('T', Tconv);
1289         fmtinstall('F', FNconv);
1290         fmtinstall('L', Lconv);
1291         fmtinstall('Q', Qconv);
1292         fmtinstall('|', VBconv);
1293 }
1294
1295 int
1296 filbuf(void)
1297 {
1298         Io *i;
1299
1300 loop:
1301         i = iostack;
1302         if(i == I)
1303                 return EOF;
1304         if(i->f < 0)
1305                 goto pop;
1306         fi.c = read(i->f, i->b, BUFSIZ) - 1;
1307         if(fi.c < 0) {
1308                 close(i->f);
1309                 linehist(0, 0);
1310                 goto pop;
1311         }
1312         fi.p = i->b + 1;
1313         return i->b[0] & 0xff;
1314
1315 pop:
1316         iostack = i->link;
1317         i->link = iofree;
1318         iofree = i;
1319         i = iostack;
1320         if(i == I)
1321                 return EOF;
1322         fi.p = i->p;
1323         fi.c = i->c;
1324         if(--fi.c < 0)
1325                 goto loop;
1326         return *fi.p++ & 0xff;
1327 }
1328
1329 int
1330 Oconv(Fmt *fp)
1331 {
1332         int a;
1333
1334         a = va_arg(fp->args, int);
1335         if(a < OXXX || a > OEND)
1336                 return fmtprint(fp, "***badO %d***", a);
1337
1338         return fmtstrcpy(fp, onames[a]);
1339 }
1340
1341 int
1342 Lconv(Fmt *fp)
1343 {
1344         char str[STRINGSZ], s[STRINGSZ];
1345         Hist *h;
1346         struct
1347         {
1348                 Hist*   incl;   /* start of this include file */
1349                 long    idel;   /* delta line number to apply to include */
1350                 Hist*   line;   /* start of this #line directive */
1351                 long    ldel;   /* delta line number to apply to #line */
1352         } a[HISTSZ];
1353         long l, d;
1354         int i, n;
1355
1356         l = va_arg(fp->args, long);
1357         n = 0;
1358         for(h = hist; h != H; h = h->link) {
1359                 if(l < h->line)
1360                         break;
1361                 if(h->name) {
1362                         if(h->offset != 0) {            /* #line directive, not #pragma */
1363                                 if(n > 0 && n < HISTSZ && h->offset >= 0) {
1364                                         a[n-1].line = h;
1365                                         a[n-1].ldel = h->line - h->offset + 1;
1366                                 }
1367                         } else {
1368                                 if(n < HISTSZ) {        /* beginning of file */
1369                                         a[n].incl = h;
1370                                         a[n].idel = h->line;
1371                                         a[n].line = 0;
1372                                 }
1373                                 n++;
1374                         }
1375                         continue;
1376                 }
1377                 n--;
1378                 if(n > 0 && n < HISTSZ) {
1379                         d = h->line - a[n].incl->line;
1380                         a[n-1].ldel += d;
1381                         a[n-1].idel += d;
1382                 }
1383         }
1384         if(n > HISTSZ)
1385                 n = HISTSZ;
1386         str[0] = 0;
1387         for(i=n-1; i>=0; i--) {
1388                 if(i != n-1) {
1389                         if(fp->flags & ~(FmtWidth|FmtPrec))     /* BUG ROB - was f3 */
1390                                 break;
1391                         strcat(str, " ");
1392                 }
1393                 if(a[i].line)
1394                         snprint(s, STRINGSZ, "%s:%ld[%s:%ld]",
1395                                 a[i].line->name, l-a[i].ldel+1,
1396                                 a[i].incl->name, l-a[i].idel+1);
1397                 else
1398                         snprint(s, STRINGSZ, "%s:%ld",
1399                                 a[i].incl->name, l-a[i].idel+1);
1400                 if(strlen(s)+strlen(str) >= STRINGSZ-10)
1401                         break;
1402                 strcat(str, s);
1403                 l = a[i].incl->line - 1;        /* now print out start of this file */
1404         }
1405         if(n == 0)
1406                 strcat(str, "<eof>");
1407         return fmtstrcpy(fp, str);
1408 }
1409
1410 int
1411 Tconv(Fmt *fp)
1412 {
1413         char str[STRINGSZ+20], s[STRINGSZ+20];
1414         Type *t, *t1;
1415         int et;
1416         long n;
1417
1418         str[0] = 0;
1419         for(t = va_arg(fp->args, Type*); t != T; t = t->link) {
1420                 et = t->etype;
1421                 if(str[0])
1422                         strcat(str, " ");
1423                 if(t->garb&~GINCOMPLETE) {
1424                         sprint(s, "%s ", gnames[t->garb&~GINCOMPLETE]);
1425                         if(strlen(str) + strlen(s) < STRINGSZ)
1426                                 strcat(str, s);
1427                 }
1428                 sprint(s, "%s", tnames[et]);
1429                 if(strlen(str) + strlen(s) < STRINGSZ)
1430                         strcat(str, s);
1431                 if(et == TFUNC && (t1 = t->down)) {
1432                         sprint(s, "(%T", t1);
1433                         if(strlen(str) + strlen(s) < STRINGSZ)
1434                                 strcat(str, s);
1435                         while(t1 = t1->down) {
1436                                 sprint(s, ", %T", t1);
1437                                 if(strlen(str) + strlen(s) < STRINGSZ)
1438                                         strcat(str, s);
1439                         }
1440                         if(strlen(str) + strlen(s) < STRINGSZ)
1441                                 strcat(str, ")");
1442                 }
1443                 if(et == TARRAY) {
1444                         n = t->width;
1445                         if(t->link && t->link->width)
1446                                 n /= t->link->width;
1447                         sprint(s, "[%ld]", n);
1448                         if(strlen(str) + strlen(s) < STRINGSZ)
1449                                 strcat(str, s);
1450                 }
1451                 if(t->nbits) {
1452                         sprint(s, " %d:%d", t->shift, t->nbits);
1453                         if(strlen(str) + strlen(s) < STRINGSZ)
1454                                 strcat(str, s);
1455                 }
1456                 if(typesu[et]) {
1457                         if(t->tag) {
1458                                 strcat(str, " ");
1459                                 if(strlen(str) + strlen(t->tag->name) < STRINGSZ)
1460                                         strcat(str, t->tag->name);
1461                         } else
1462                                 strcat(str, " {}");
1463                         break;
1464                 }
1465         }
1466         return fmtstrcpy(fp, str);
1467 }
1468
1469 int
1470 FNconv(Fmt *fp)
1471 {
1472         char *str;
1473         Node *n;
1474
1475         n = va_arg(fp->args, Node*);
1476         str = "<indirect>";
1477         if(n != Z && (n->op == ONAME || n->op == ODOT || n->op == OELEM))
1478                 str = n->sym->name;
1479         return fmtstrcpy(fp, str);
1480 }
1481
1482 int
1483 Qconv(Fmt *fp)
1484 {
1485         char str[STRINGSZ+20], *s;
1486         long b;
1487         int i;
1488
1489         str[0] = 0;
1490         for(b = va_arg(fp->args, long); b;) {
1491                 i = bitno(b);
1492                 if(str[0])
1493                         strcat(str, " ");
1494                 s = qnames[i];
1495                 if(strlen(str) + strlen(s) >= STRINGSZ)
1496                         break;
1497                 strcat(str, s);
1498                 b &= ~(1L << i);
1499         }
1500         return fmtstrcpy(fp, str);
1501 }
1502
1503 int
1504 VBconv(Fmt *fp)
1505 {
1506         char str[STRINGSZ];
1507         int i, n, t, pc;
1508
1509         n = va_arg(fp->args, int);
1510         pc = 0; /* BUG: was printcol */
1511         i = 0;
1512         while(pc < n) {
1513                 t = (pc+4) & ~3;
1514                 if(t <= n) {
1515                         str[i++] = '\t';
1516                         pc = t;
1517                         continue;
1518                 }
1519                 str[i++] = ' ';
1520                 pc++;
1521         }
1522         str[i] = 0;
1523
1524         return fmtstrcpy(fp, str);
1525 }
1526
1527 void
1528 setinclude(char *p)
1529 {
1530         int i;
1531         char *e;
1532
1533         while(*p != 0) {
1534                 e = strchr(p, ' ');
1535                 if(e != 0)
1536                         *e = '\0';
1537
1538                 for(i=1; i < ninclude; i++)
1539                         if(strcmp(p, include[i]) == 0)
1540                                 break;
1541
1542                 if(i >= ninclude){
1543                         if(ninclude >= nelem(include)) {
1544                                 diag(Z, "ninclude too small %d", nelem(include));
1545                                 exits("ninclude");
1546                         }
1547                         include[ninclude++] = p;
1548                 }
1549
1550                 if(e == 0)
1551                         break;
1552                 p = e+1;
1553         }
1554 }