]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/yacc.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / cmd / yacc.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune      Bungetc         /* ok for now. */
7
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET         ((32+NTERMS)/32)        /* BOTCH?? +31 */
12 #define BIT(a,i)        ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)     ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)       (((n)+32)/32)
15
16 #define PARSER          "/sys/lib/yaccpar"
17 #define PARSERS         "/sys/lib/yaccpars"
18 #define TEMPNAME        "y.tmp.XXXXXX"
19 #define ACTNAME         "y.acts.XXXXXX"
20 #define OFILE           "tab.c"
21 #define FILEU           "output"
22 #define FILED           "tab.h"
23 #define FILEDEBUG       "debug"
24
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31         ACTSIZE         = 40000,
32         MEMSIZE         = 40000,
33         NSTATES         = 2000,
34         NTERMS          = 511,
35         NPROD           = 1600,
36         NNONTERM        = 600,
37         TEMPSIZE        = 2000,
38         CNAMSZ          = 10000,
39         LSETSIZE        = 2400,
40         WSETSIZE        = 350,
41
42         NAMESIZE        = 50,
43         NTYPES          = 63,
44         ISIZE           = 400,
45
46         PRIVATE         = 0xE000,       /* unicode private use */
47
48         /* relationships which must hold:
49                 TBITSET ints must hold NTERMS+1 bits...
50                 WSETSIZE >= NNONTERM
51                 LSETSIZE >= NNONTERM
52                 TEMPSIZE >= NTERMS + NNONTERM + 1
53                 TEMPSIZE >= NSTATES
54         */
55
56         NTBASE          = 010000,
57         ERRCODE         = 8190,
58         ACCEPTCODE      = 8191,
59
60         NOASC           = 0,    /* no assoc. */
61         LASC            = 1,    /* left assoc. */
62         RASC            = 2,    /* right assoc. */
63         BASC            = 3,    /* binary assoc. */
64
65         /* flags for state generation */
66
67         DONE            = 0,
68         MUSTDO          = 1,
69         MUSTLOOKAHEAD   = 2,
70
71         /* flags for a rule having an action, and being reduced */
72
73         ACTFLAG         = 04,
74         REDFLAG         = 010,
75
76         /* output parser flags */
77         YYFLAG1         = -1000,
78
79         /* parse tokens */
80         IDENTIFIER      = PRIVATE,
81         MARK,
82         TERM,
83         LEFT,
84         RIGHT,
85         BINARY,
86         PREC,
87         LCURLY,
88         IDENTCOLON,
89         NUMBER,
90         START,
91         TYPEDEF,
92         TYPENAME,
93         UNION,
94
95         ENDFILE         = 0,
96
97         EMPTY           = 1,
98         WHOKNOWS        = 0,
99         OK              = 1,
100         NOMORE          = -1000,
101 };
102
103         /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i)        ((i)&03)
106 #define PLEVEL(i)       (((i)>>4)&077)
107 #define TYPE(i)         (((i)>>10)&077)
108
109         /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j)     i |= j
112 #define SETPLEV(i,j)    i |= (j<<4)
113 #define SETTYPE(i,j)    i |= (j<<10)
114
115         /* looping macros */
116
117 #define TLOOP(i)        for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)       for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)      for(i=s; i<nprod; i++)
120 #define SLOOP(i)        for(i=0; i<nstate; i++)
121 #define WSBUMP(x)       x++
122 #define WSLOOP(s,j)     for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)  for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)      for(i=0; i<tbitset; i++)
125
126         /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(x)      if(x) remove(x)
129
130         /* I/O descriptors */
131
132 Biobuf* faction;        /* file for saving actions */
133 Biobuf* fdefine;        /* file for #defines */
134 Biobuf* fdebug;         /* y.debug for strings for debugging */
135 Biobuf* ftable;         /* y.tab.c file */
136 Biobuf* ftemp;          /* tempfile to pass 2 */
137 Biobuf* finput;         /* input file */
138 Biobuf* foutput;        /* y.output file */
139
140         /* communication variables between various I/O routines */
141
142 char*   infile;                 /* input file name */
143 int     numbval;                /* value of an input number */
144 char    tokname[NAMESIZE+4];    /* input token name, slop for runes and 0 */
145
146         /* structure declarations */
147
148 typedef
149 struct
150 {
151         int     lset[TBITSET];
152 } Lkset;
153
154 typedef
155 struct
156 {
157         int*    pitem;
158         Lkset*  look;
159 } Item;
160
161 typedef
162 struct
163 {
164         char*   name;
165         int     value;
166 } Symb;
167
168 typedef
169 struct
170 {
171         int*    pitem;
172         int     flag;
173         Lkset   ws;
174 } Wset;
175
176         /* storage of names */
177
178 char    cnames[CNAMSZ];         /* place where token and nonterminal names are stored */
179 int     cnamsz = CNAMSZ;        /* size of cnames */
180 char*   cnamp = cnames;         /* place where next name is to be put in */
181 int     ndefout = 4;            /* number of defined symbols output */
182 char*   tempname;
183 char*   actname;
184 char    ttempname[] = TEMPNAME;
185 char    tactname[] = ACTNAME;
186 char*   parser = PARSER;
187 char*   yydebug;
188
189         /* storage of types */
190 int     ntypes;                 /* number of types defined */
191 char*   typeset[NTYPES];        /* pointers to type tags */
192
193         /* token information */
194
195 int     ntokens = 0 ;           /* number of tokens */
196 Symb    tokset[NTERMS];
197 int     toklev[NTERMS];         /* vector with the precedence of the terminals */
198
199         /* nonterminal information */
200
201 int     nnonter = -1;           /* the number of nonterminals */
202 Symb    nontrst[NNONTERM];
203 int     start;                  /* start symbol */
204
205         /* assigned token type values */
206 int     extval = 0;
207
208 char*   ytabc = OFILE;  /* name of y.tab.c */
209
210         /* grammar rule information */
211
212 int     mem0[MEMSIZE] ;         /* production storage */
213 int*    mem = mem0;
214 int     nprod = 1;              /* number of productions */
215 int*    prdptr[NPROD];          /* pointers to descriptions of productions */
216 int     levprd[NPROD];          /* precedence levels for the productions */
217 int     rlines[NPROD];          /* line number for this rule */
218
219         /* state information */
220
221 int     nstate = 0;             /* number of states */
222 Item*   pstate[NSTATES+2];      /* pointers to the descriptions of the states */
223 int     tystate[NSTATES];       /* contains type information about the states */
224 int     defact[NSTATES];        /* the default actions of states */
225 int     tstates[NTERMS];        /* states generated by terminal gotos */
226 int     ntstates[NNONTERM];     /* states generated by nonterminal gotos */
227 int     mstates[NSTATES];       /* chain of overflows of term/nonterm generation lists  */
228 int     lastred;                /* the number of the last reduction of a state */
229
230         /* lookahead set information */
231
232 Lkset   lkst[LSETSIZE];
233 int     nolook;                 /* flag to turn off lookahead computations */
234 int     tbitset;                /* size of lookahead sets */
235 int     nlset = 0;              /* next lookahead set index */
236 int     nolook = 0;             /* flag to suppress lookahead computations */
237 Lkset   clset;                  /* temporary storage for lookahead computations */
238
239         /* working set information */
240
241 Wset    wsets[WSETSIZE];
242 Wset*   cwp;
243
244         /* storage for action table */
245
246 int     amem[ACTSIZE];          /* action table storage */
247 int*    memp = amem;            /* next free action table position */
248 int     indgo[NSTATES];         /* index to the stored goto table */
249
250         /* temporary vector, indexable by states, terms, or ntokens */
251
252 int     temp1[TEMPSIZE];        /* temporary storage, indexed by terms + ntokens or states */
253 int     lineno = 1;             /* current input line number */
254 int     fatfl = 1;              /* if on, error is fatal */
255 int     nerrors = 0;            /* number of errors */
256
257         /* statistics collection variables */
258
259 int     zzgoent;
260 int     zzgobest;
261 int     zzacent;
262 int     zzexcp;
263 int     zzclose;
264 int     zzrrconf;
265 int     zzsrconf;
266
267 int*    ggreed = lkst[0].lset;
268 int*    pgo = wsets[0].ws.lset;
269 int*    yypgo = &nontrst[0].value;
270
271 int     maxspr = 0;             /* maximum spread of any entry */
272 int     maxoff = 0;             /* maximum offset into a array */
273 int*    pmem = mem0;
274 int*    maxa;
275 int     nxdb = 0;
276 int     adb = 0;
277
278
279         /* storage for information about the nonterminals */
280
281 int**   pres[NNONTERM+2];       /* vector of pointers to productions yielding each nonterminal */
282 Lkset*  pfirst[NNONTERM+2];     /* vector of pointers to first sets for each nonterminal */
283 int     pempty[NNONTERM+1];     /* vector of nonterminals nontrivially deriving e */
284
285         /* random stuff picked out from between functions */
286
287 int     indebug = 0;
288 Wset*   zzcwp = wsets;
289 int     zzgoent = 0;
290 int     zzgobest = 0;
291 int     zzacent = 0;
292 int     zzexcp = 0;
293 int     zzclose = 0;
294 int     zzsrconf = 0;
295 int*    zzmemsz = mem0;
296 int     zzrrconf = 0;
297 int     pidebug = 0;            /* debugging flag for putitem */
298 int     gsdebug = 0;
299 int     cldebug = 0;            /* debugging flag for closure */
300 int     pkdebug = 0;
301 int     g2debug = 0;
302
303 struct
304 {
305         char*   name;
306         long    value;
307 } resrv[] =
308 {
309         "binary",       BINARY,
310         "left",         LEFT,
311         "nonassoc",     BINARY,
312         "prec",         PREC,
313         "right",        RIGHT,
314         "start",        START,
315         "term",         TERM,
316         "token",        TERM,
317         "type",         TYPEDEF,
318         "union",        UNION,
319         0,
320 };
321
322         /* define functions */
323
324 void    main(int, char**);
325 void    others(void);
326 char*   chcopy(char*, char*);
327 char*   writem(int*);
328 char*   symnam(int);
329 void    summary(void);
330 void    error(char*, ...);
331 void    aryfil(int*, int, int);
332 int     setunion(int*, int*);
333 void    prlook(Lkset*);
334 void    cpres(void);
335 void    cpfir(void);
336 int     state(int);
337 void    putitem(int*, Lkset*);
338 void    cempty(void);
339 void    stagen(void);
340 void    closure(int);
341 Lkset*  flset(Lkset*);
342 void    cleantmp(void);
343 void    intr(void);
344 void    setup(int, char**);
345 void    finact(void);
346 int     defin(int, char*);
347 void    defout(int);
348 char*   cstash(char*);
349 long    gettok(void);
350 int     fdtype(int);
351 int     chfind(int, char*);
352 void    cpyunion(void);
353 void    cpycode(void);
354 int     skipcom(void);
355 void    cpyact(int);
356 void    openup(char*, int, int, int, char*);
357 void    output(void);
358 int     apack(int*, int);
359 void    go2out(void);
360 void    go2gen(int);
361 void    precftn(int, int, int);
362 void    wract(int);
363 void    wrstate(int);
364 void    warray(char*, int*, int);
365 void    hideprod(void);
366 void    callopt(void);
367 void    gin(int);
368 void    stin(int);
369 int     nxti(void);
370 void    osummary(void);
371 void    aoutput(void);
372 void    arout(char*, int*, int);
373 int     gtnm(void);
374
375 void
376 main(int argc, char *argv[])
377 {
378
379         setup(argc, argv);      /* initialize and read productions */
380         tbitset = NWORDS(ntokens);
381         cpres();                /* make table of which productions yield a given nonterminal */
382         cempty();               /* make a table of which nonterminals can match the empty string */
383         cpfir();                /* make a table of firsts of nonterminals */
384         stagen();               /* generate the states */
385         output();               /* write the states and the tables */
386         go2out();
387         hideprod();
388         summary();
389         callopt();
390         others();
391         exits(0);
392 }
393
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
398 others(void)
399 {
400         int c, i, j;
401
402         finput = Bopen(parser, OREAD);
403         if(finput == 0)
404                 error("cannot find parser %s", parser);
405         warray("yyr1", levprd, nprod);
406         aryfil(temp1, nprod, 0);
407         PLOOP(1, i)
408                 temp1[i] = prdptr[i+1]-prdptr[i]-2;
409         warray("yyr2", temp1, nprod);
410
411         aryfil(temp1, nstate, -1000);
412         TLOOP(i)
413                 for(j=tstates[i]; j!=0; j=mstates[j])
414                         temp1[j] = i;
415         NTLOOP(i)
416                 for(j=ntstates[i]; j!=0; j=mstates[j])
417                         temp1[j] = -i;
418         warray("yychk", temp1, nstate);
419         warray("yydef", defact, nstate);
420
421         /* put out token translation tables */
422         /* table 1 has 0-256 */
423         aryfil(temp1, 256, 0);
424         c = 0;
425         TLOOP(i) {
426                 j = tokset[i].value;
427                 if(j >= 0 && j < 256) {
428                         if(temp1[j]) {
429                                 print("yacc bug -- cant have 2 different Ts with same value\n");
430                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431                                 nerrors++;
432                         }
433                         temp1[j] = i;
434                         if(j > c)
435                                 c = j;
436                 }
437         }
438         warray("yytok1", temp1, c+1);
439
440         /* table 2 has PRIVATE-PRIVATE+256 */
441         aryfil(temp1, 256, 0);
442         c = 0;
443         TLOOP(i) {
444                 j = tokset[i].value - PRIVATE;
445                 if(j >= 0 && j < 256) {
446                         if(temp1[j]) {
447                                 print("yacc bug -- cant have 2 different Ts with same value\n");
448                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449                                 nerrors++;
450                         }
451                         temp1[j] = i;
452                         if(j > c)
453                                 c = j;
454                 }
455         }
456         warray("yytok2", temp1, c+1);
457
458         /* table 3 has everything else */
459         Bprint(ftable, "long    yytok3[] =\n{\n");
460         c = 0;
461         TLOOP(i) {
462                 j = tokset[i].value;
463                 if(j >= 0 && j < 256)
464                         continue;
465                 if(j >= PRIVATE && j < 256+PRIVATE)
466                         continue;
467
468                 Bprint(ftable, "%4d,%4d,", j, i);
469                 c++;
470                 if(c%5 == 0)
471                         Bprint(ftable, "\n");
472         }
473         Bprint(ftable, "%4d\n};\n", 0);
474
475         /* copy parser text */
476         while((c=Bgetrune(finput)) != Beof) {
477                 if(c == '$') {
478                         if((c = Bgetrune(finput)) != 'A')
479                                 Bputrune(ftable, '$');
480                         else { /* copy actions */
481                                 faction = Bopen(actname, OREAD);
482                                 if(faction == 0)
483                                         error("cannot reopen action tempfile");
484                                 while((c=Bgetrune(faction)) != Beof)
485                                         Bputrune(ftable, c);
486                                 Bterm(faction);
487                                 ZAPFILE(actname);
488                                 c = Bgetrune(finput);
489                         }
490                 }
491                 Bputrune(ftable, c);
492         }
493         Bterm(ftable);
494 }
495
496 /*
497  * copies string q into p, returning next free char ptr
498  */
499 char*
500 chcopy(char* p, char* q)
501 {
502         int c;
503
504         while(c = *q) {
505                 if(c == '"')
506                         *p++ = '\\';
507                 *p++ = c;
508                 q++;
509         }
510         *p = 0;
511         return p;
512 }
513
514 /*
515  * creates output string for item pointed to by pp
516  */
517 char*
518 writem(int *pp)
519 {
520         int i,*p;
521         static char sarr[ISIZE];
522         char* q;
523
524         for(p=pp; *p>0; p++)
525                 ;
526         p = prdptr[-*p];
527         q = chcopy(sarr, nontrst[*p-NTBASE].name);
528         q = chcopy(q, ": ");
529         for(;;) {
530                 *q = ' ';
531                 p++;
532                 if(p == pp)
533                         *q = '.';
534                 q++;
535                 *q = '\0';
536                 i = *p;
537                 if(i <= 0)
538                         break;
539                 q = chcopy(q, symnam(i));
540                 if(q > &sarr[ISIZE-30])
541                         error("item too big");
542         }
543
544         /* an item calling for a reduction */
545         i = *pp;
546         if(i < 0 ) {
547                 q = chcopy(q, "    (");
548                 sprint(q, "%d)", -i);
549         }
550         return sarr;
551 }
552
553 /*
554  * return a pointer to the name of symbol i
555  */
556 char*
557 symnam(int i)
558 {
559         char* cp;
560
561         cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562         if(*cp == ' ')
563                 cp++;
564         return cp;
565 }
566
567 /*
568  * output the summary on y.output
569  */
570 void
571 summary(void)
572 {
573
574         if(foutput != 0) {
575                 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576                         ntokens, NTERMS, nnonter, NNONTERM);
577                 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578                         nprod, NPROD, nstate, NSTATES);
579                 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580                         zzsrconf, zzrrconf);
581                 Bprint(foutput, "%d/%d working sets used\n",
582                         (int)(zzcwp-wsets), WSETSIZE);
583                 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584                         (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585                 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586                 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587                 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588                 Bprint(foutput, "%d goto entries\n", zzgoent);
589                 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590         }
591         if(zzsrconf != 0 || zzrrconf != 0) {
592                 print("\nconflicts: ");
593                 if(zzsrconf)
594                         print("%d shift/reduce", zzsrconf);
595                 if(zzsrconf && zzrrconf)
596                         print(", ");
597                 if(zzrrconf)
598                         print("%d reduce/reduce", zzrrconf);
599                 print("\n");
600         }
601         if(ftemp != 0) {
602                 Bterm(ftemp);
603                 ftemp = 0;
604         }
605         if(fdefine != 0) {
606                 Bterm(fdefine);
607                 fdefine = 0;
608         }
609 }
610
611 /*
612  * write out error comment -- NEEDS WORK
613  */
614 void
615 error(char *s, ...)
616 {
617
618         nerrors++;
619         fprint(2, "\n fatal error:");
620         fprint(2, s, (&s)[1]);
621         fprint(2, ", %s:%d\n", infile, lineno);
622         if(!fatfl)
623                 return;
624         summary();
625         cleantmp();
626         exits("error");
627 }
628
629 /*
630  * set elements 0 through n-1 to c
631  */
632 void
633 aryfil(int *v, int n, int c)
634 {
635         int i;
636
637         for(i=0; i<n; i++)
638                 v[i] = c;
639 }
640
641 /*
642  * set a to the union of a and b
643  * return 1 if b is not a subset of a, 0 otherwise
644  */
645 int
646 setunion(int *a, int *b)
647 {
648         int i, x, sub;
649
650         sub = 0;
651         SETLOOP(i) {
652                 x = *a;
653                 *a |= *b;
654                 if(*a != x)
655                         sub = 1;
656                 a++;
657                 b++;
658         }
659         return sub;
660 }
661
662 void
663 prlook(Lkset* p)
664 {
665         int j, *pp;
666
667         pp = p->lset;
668         if(pp == 0)
669                 Bprint(foutput, "\tNULL");
670         else {
671                 Bprint(foutput, " { ");
672                 TLOOP(j)
673                         if(BIT(pp,j))
674                                 Bprint(foutput, "%s ", symnam(j));
675                 Bprint(foutput, "}");
676         }
677 }
678
679 /*
680  * compute an array with the beginnings of  productions yielding given nonterminals
681  * The array pres points to these lists
682  * the array pyield has the lists: the total size is only NPROD+1
683  */
684 void
685 cpres(void)
686 {
687         int c, j, i, **pmem;
688         static int *pyield[NPROD];
689
690         pmem = pyield;
691         NTLOOP(i) {
692                 c = i+NTBASE;
693                 pres[i] = pmem;
694                 fatfl = 0;      /* make undefined  symbols  nonfatal */
695                 PLOOP(0, j)
696                         if(*prdptr[j] == c)
697                                 *pmem++ =  prdptr[j]+1;
698                 if(pres[i] == pmem)
699                         error("nonterminal %s not defined!", nontrst[i].name);
700         }
701         pres[i] = pmem;
702         fatfl = 1;
703         if(nerrors) {
704                 summary();
705                 cleantmp();
706                 exits("error");
707         }
708         if(pmem != &pyield[nprod])
709                 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710 }
711
712 /*
713  * compute an array with the first of nonterminals
714  */
715 void
716 cpfir(void)
717 {
718         int *p, **s, i, **t, ch, changes;
719
720         zzcwp = &wsets[nnonter];
721         NTLOOP(i) {
722                 aryfil(wsets[i].ws.lset, tbitset, 0);
723                 t = pres[i+1];
724                 /* initially fill the sets */
725                 for(s=pres[i]; s<t; ++s)
726                         for(p = *s; (ch = *p) > 0; ++p) {
727                                 if(ch < NTBASE) {
728                                         SETBIT(wsets[i].ws.lset, ch);
729                                         break;
730                                 }
731                                 if(!pempty[ch-NTBASE])
732                                         break;
733                         }
734         }
735
736         /* now, reflect transitivity */
737         changes = 1;
738         while(changes) {
739                 changes = 0;
740                 NTLOOP(i) {
741                         t = pres[i+1];
742                         for(s = pres[i]; s < t; ++s)
743                                 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744                                         changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745                                         if(!pempty[ch])
746                                                 break;
747                                 }
748                 }
749         }
750
751         NTLOOP(i)
752                 pfirst[i] = flset(&wsets[i].ws);
753         if(!indebug)
754                 return;
755         if(foutput != 0)
756                 NTLOOP(i) {
757                         Bprint(foutput, "\n%s: ", nontrst[i].name);
758                         prlook(pfirst[i]);
759                         Bprint(foutput, " %d\n", pempty[i]);
760                 }
761 }
762
763 /*
764  * sorts last state,and sees if it equals earlier ones. returns state number
765  */
766 int
767 state(int c)
768 {
769         Item *p1, *p2, *k, *l, *q1, *q2;
770         int size1, size2, i;
771
772         p1 = pstate[nstate];
773         p2 = pstate[nstate+1];
774         if(p1 == p2)
775                 return 0;       /* null state */
776         /* sort the items */
777         for(k = p2-1; k > p1; k--)      /* make k the biggest */
778                 for(l = k-1; l >= p1; --l)
779                         if(l->pitem > k->pitem) {
780                                 int *s;
781                                 Lkset *ss;
782
783                                 s = k->pitem;
784                                 k->pitem = l->pitem;
785                                 l->pitem = s;
786                                 ss = k->look;
787                                 k->look = l->look;
788                                 l->look = ss;
789                         }
790         size1 = p2 - p1;        /* size of state */
791
792         for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793                 /* get ith state */
794                 q1 = pstate[i];
795                 q2 = pstate[i+1];
796                 size2 = q2 - q1;
797                 if(size1 != size2)
798                         continue;
799                 k = p1;
800                 for(l = q1; l < q2; l++) {
801                         if(l->pitem != k->pitem)
802                                 break;
803                         k++;
804                 }
805                 if(l != q2)
806                         continue;
807                 /* found it */
808                 pstate[nstate+1] = pstate[nstate];      /* delete last state */
809                 /* fix up lookaheads */
810                 if(nolook)
811                         return i;
812                 for(l = q1, k = p1; l < q2; ++l, ++k ) {
813                         int s;
814
815                         SETLOOP(s)
816                                 clset.lset[s] = l->look->lset[s];
817                         if(setunion(clset.lset, k->look->lset)) {
818                                 tystate[i] = MUSTDO;
819                                 /* register the new set */
820                                 l->look = flset( &clset );
821                         }
822                 }
823                 return i;
824         }
825         /* state is new */
826         if(nolook)
827                 error("yacc state/nolook error");
828         pstate[nstate+2] = p2;
829         if(nstate+1 >= NSTATES)
830                 error("too many states");
831         if(c >= NTBASE) {
832                 mstates[nstate] = ntstates[c-NTBASE];
833                 ntstates[c-NTBASE] = nstate;
834         } else {
835                 mstates[nstate] = tstates[c];
836                 tstates[c] = nstate;
837         }
838         tystate[nstate] = MUSTDO;
839         return nstate++;
840 }
841
842 void
843 putitem(int *ptr, Lkset *lptr)
844 {
845         Item *j;
846
847         if(pidebug && foutput != 0)
848                 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849         j = pstate[nstate+1];
850         j->pitem = ptr;
851         if(!nolook)
852                 j->look = flset(lptr);
853         pstate[nstate+1] = ++j;
854         if((int*)j > zzmemsz) {
855                 zzmemsz = (int*)j;
856                 if(zzmemsz >=  &mem0[MEMSIZE])
857                         error("out of state space");
858         }
859 }
860
861 /*
862  * mark nonterminals which derive the empty string
863  * also, look for nonterminals which don't derive any token strings
864  */
865 void
866 cempty(void)
867 {
868
869         int i, *p;
870
871         /* first, use the array pempty to detect productions that can never be reduced */
872         /* set pempty to WHONOWS */
873         aryfil(pempty, nnonter+1, WHOKNOWS);
874
875         /* now, look at productions, marking nonterminals which derive something */
876 more:
877         PLOOP(0, i) {
878                 if(pempty[*prdptr[i] - NTBASE])
879                         continue;
880                 for(p = prdptr[i]+1; *p >= 0; ++p)
881                         if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882                                 break;
883                 /* production can be derived */
884                 if(*p < 0) {
885                         pempty[*prdptr[i]-NTBASE] = OK;
886                         goto more;
887                 }
888         }
889
890         /* now, look at the nonterminals, to see if they are all OK */
891         NTLOOP(i) {
892                 /* the added production rises or falls as the start symbol ... */
893                 if(i == 0)
894                         continue;
895                 if(pempty[i] != OK) {
896                         fatfl = 0;
897                         error("nonterminal %s never derives any token string", nontrst[i].name);
898                 }
899         }
900
901         if(nerrors) {
902                 summary();
903                 cleantmp();
904                 exits("error");
905         }
906
907         /* now, compute the pempty array, to see which nonterminals derive the empty string */
908         /* set pempty to WHOKNOWS */
909         aryfil( pempty, nnonter+1, WHOKNOWS);
910
911         /* loop as long as we keep finding empty nonterminals */
912
913 again:
914         PLOOP(1, i) {
915                 /* not known to be empty */
916                 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917                         for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918                                 ;
919                         /* we have a nontrivially empty nonterminal */
920                         if(*p < 0) {
921                                 pempty[*prdptr[i]-NTBASE] = EMPTY;
922                                 /* got one ... try for another */
923                                 goto again;
924                         }
925                 }
926         }
927 }
928
929 /*
930  * generate the states
931  */
932 void
933 stagen(void)
934 {
935
936         int c, i, j, more;
937         Wset *p, *q;
938
939         /* initialize */
940         nstate = 0;
941
942         /* THIS IS FUNNY from the standpoint of portability
943          * it represents the magic moment when the mem0 array, which has
944          * been holding the productions, starts to hold item pointers, of a
945          * different type...
946          * someday, alloc should be used to allocate all this stuff... for now, we
947          * accept that if pointers don't fit in integers, there is a problem...
948          */
949
950         pstate[0] = pstate[1] = (Item*)mem;
951         aryfil(clset.lset, tbitset, 0);
952         putitem(prdptr[0]+1, &clset);
953         tystate[0] = MUSTDO;
954         nstate = 1;
955         pstate[2] = pstate[1];
956
957         aryfil(amem, ACTSIZE, 0);
958
959         /* now, the main state generation loop */
960         for(more=1; more;) {
961                 more = 0;
962                 SLOOP(i) {
963                         if(tystate[i] != MUSTDO)
964                                 continue;
965                         tystate[i] = DONE;
966                         aryfil(temp1, nnonter+1, 0);
967                         /* take state i, close it, and do gotos */
968                         closure(i);
969                         /* generate goto's */
970                         WSLOOP(wsets, p) {
971                                 if(p->flag)
972                                         continue;
973                                 p->flag = 1;
974                                 c = *(p->pitem);
975                                 if(c <= 1) {
976                                         if(pstate[i+1]-pstate[i] <= p-wsets)
977                                                 tystate[i] = MUSTLOOKAHEAD;
978                                         continue;
979                                 }
980                                 /* do a goto on c */
981                                 WSLOOP(p, q)
982                                         /* this item contributes to the goto */
983                                         if(c == *(q->pitem)) {
984                                                 putitem(q->pitem+1, &q->ws);
985                                                 q->flag = 1;
986                                         }
987                                 if(c < NTBASE)
988                                         state(c);       /* register new state */
989                                 else
990                                         temp1[c-NTBASE] = state(c);
991                         }
992                         if(gsdebug && foutput != 0) {
993                                 Bprint(foutput, "%d: ", i);
994                                 NTLOOP(j)
995                                         if(temp1[j])
996                                                 Bprint(foutput, "%s %d, ",
997                                                 nontrst[j].name, temp1[j]);
998                                 Bprint(foutput, "\n");
999                         }
1000                         indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001                         /* do some more */
1002                         more = 1;
1003                 }
1004         }
1005 }
1006
1007 /*
1008  * generate the closure of state i
1009  */
1010 void
1011 closure(int i)
1012 {
1013
1014         Wset *u, *v;
1015         Item *p, *q;
1016         int c, ch, work, k, *pi, **s, **t;
1017
1018         zzclose++;
1019
1020         /* first, copy kernel of state i to wsets */
1021         cwp = wsets;
1022         ITMLOOP(i, p, q) {
1023                 cwp->pitem = p->pitem;
1024                 cwp->flag = 1;                  /* this item must get closed */
1025                 SETLOOP(k)
1026                         cwp->ws.lset[k] = p->look->lset[k];
1027                 WSBUMP(cwp);
1028         }
1029
1030         /* now, go through the loop, closing each item */
1031         work = 1;
1032         while(work) {
1033                 work = 0;
1034                 WSLOOP(wsets, u) {
1035                         if(u->flag == 0)
1036                                 continue;
1037                         /* dot is before c */
1038                         c = *(u->pitem);
1039                         if(c < NTBASE) {
1040                                 u->flag = 0;
1041                                 /* only interesting case is where . is before nonterminal */
1042                                 continue;
1043                         }
1044
1045                         /* compute the lookahead */
1046                         aryfil(clset.lset, tbitset, 0);
1047
1048                         /* find items involving c */
1049                         WSLOOP(u, v)
1050                                 if(v->flag == 1 && *(pi=v->pitem) == c) {
1051                                         v->flag = 0;
1052                                         if(nolook)
1053                                                 continue;
1054                                         while((ch = *++pi) > 0) {
1055                                                 /* terminal symbol */
1056                                                 if(ch < NTBASE) {
1057                                                         SETBIT(clset.lset, ch);
1058                                                         break;
1059                                                 }
1060                                                 /* nonterminal symbol */
1061                                                 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062                                                 if(!pempty[ch-NTBASE])
1063                                                         break;
1064                                         }
1065                                         if(ch <= 0)
1066                                                 setunion(clset.lset, v->ws.lset);
1067                                 }
1068
1069                         /*
1070                          * now loop over productions derived from c
1071                          * c is now nonterminal number
1072                          */
1073                         c -= NTBASE;
1074                         t = pres[c+1];
1075                         for(s = pres[c]; s < t; ++s) {
1076                                 /*
1077                                  * put these items into the closure
1078                                  * is the item there
1079                                  */
1080                                 WSLOOP(wsets, v)
1081                                         /* yes, it is there */
1082                                         if(v->pitem == *s) {
1083                                                 if(nolook)
1084                                                         goto nexts;
1085                                                 if(setunion(v->ws.lset, clset.lset))
1086                                                         v->flag = work = 1;
1087                                                 goto nexts;
1088                                         }
1089
1090                                 /*  not there; make a new entry */
1091                                 if(cwp-wsets+1 >= WSETSIZE)
1092                                         error( "working set overflow");
1093                                 cwp->pitem = *s;
1094                                 cwp->flag = 1;
1095                                 if(!nolook) {
1096                                         work = 1;
1097                                         SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098                                 }
1099                                 WSBUMP(cwp);
1100
1101                         nexts:;
1102                         }
1103                 }
1104         }
1105
1106         /* have computed closure; flags are reset; return */
1107         if(cwp > zzcwp)
1108                 zzcwp = cwp;
1109         if(cldebug && foutput != 0) {
1110                 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111                 WSLOOP(wsets, u) {
1112                         if(u->flag)
1113                                 Bprint(foutput, "flag set!\n");
1114                         u->flag = 0;
1115                         Bprint(foutput, "\t%s", writem(u->pitem));
1116                         prlook(&u->ws);
1117                         Bprint(foutput, "\n");
1118                 }
1119         }
1120 }
1121
1122 /*
1123  * decide if the lookahead set pointed to by p is known
1124  * return pointer to a perminent location for the set
1125  */
1126 Lkset*
1127 flset(Lkset *p)
1128 {
1129         Lkset *q;
1130         int *u, *v, *w, j;
1131
1132         for(q = &lkst[nlset]; q-- > lkst;) {
1133                 u = p->lset;
1134                 v = q->lset;
1135                 w = &v[tbitset];
1136                 while(v < w)
1137                         if(*u++ != *v++)
1138                                 goto more;
1139                 /* we have matched */
1140                 return q;
1141         more:;
1142         }
1143         /* add a new one */
1144         q = &lkst[nlset++];
1145         if(nlset >= LSETSIZE)
1146                 error("too many lookahead sets");
1147         SETLOOP(j)
1148                 q->lset[j] = p->lset[j];
1149         return q;
1150 }
1151
1152 void
1153 cleantmp(void)
1154 {
1155         ZAPFILE(actname);
1156         ZAPFILE(tempname);
1157 }
1158
1159 void
1160 intr(void)
1161 {
1162         cleantmp();
1163         exits("interrupted");
1164 }
1165
1166 void
1167 usage(void)
1168 {
1169         fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1170         exits("usage");
1171 }
1172
1173 void
1174 setup(int argc, char *argv[])
1175 {
1176         long c, t;
1177         int i, j, lev, ty, ytab, *p;
1178         int vflag, dflag, stem;
1179         char actnm[8], *stemc, *s, dirbuf[128];
1180
1181         ytab = 0;
1182         vflag = 0;
1183         dflag = 0;
1184         stem = 0;
1185         stemc = "y";
1186         foutput = 0;
1187         fdefine = 0;
1188         fdebug = 0;
1189         ARGBEGIN{
1190         case 'v':
1191         case 'V':
1192                 vflag++;
1193                 break;
1194         case 'D':
1195                 yydebug = EARGF(usage());
1196                 break;
1197         case 'd':
1198                 dflag++;
1199                 break;
1200         case 'o':
1201                 ytab++;
1202                 ytabc = EARGF(usage());
1203                 break;
1204         case 's':
1205                 stem++;
1206                 stemc = ARGF();
1207                 break;
1208         case 'S':
1209                 parser = PARSERS;
1210                 break;
1211         default:
1212                 error("illegal option: %c", ARGC());
1213         }ARGEND
1214         openup(stemc, dflag, vflag, ytab, ytabc);
1215
1216         ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1217         faction = Bopen(actname = mktemp(tactname), OWRITE);
1218         if(ftemp == 0 || faction == 0)
1219                 error("cannot open temp file");
1220         if(argc < 1)
1221                 error("no input file");
1222         infile = argv[0];
1223         if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1224                 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1225                 s = malloc(i);
1226                 if(s != nil){
1227                         snprint(s, i, "%s/%s", dirbuf, infile);
1228                         cleanname(s);
1229                         infile = s;
1230                 }
1231         }
1232         finput = Bopen(infile, OREAD);
1233         if(finput == 0)
1234                 error("cannot open '%s'", argv[0]);
1235         cnamp = cnames;
1236
1237         defin(0, "$end");
1238         extval = PRIVATE;       /* tokens start in unicode 'private use' */
1239         defin(0, "error");
1240         defin(1, "$accept");
1241         defin(0, "$unk");
1242         mem = mem0;
1243         i = 0;
1244
1245         for(t = gettok(); t != MARK && t != ENDFILE;)
1246         switch(t) {
1247         case ';':
1248                 t = gettok();
1249                 break;
1250
1251         case START:
1252                 if(gettok() != IDENTIFIER)
1253                         error("bad %%start construction");
1254                 start = chfind(1, tokname);
1255                 t = gettok();
1256                 continue;
1257
1258         case TYPEDEF:
1259                 if(gettok() != TYPENAME)
1260                         error("bad syntax in %%type");
1261                 ty = numbval;
1262                 for(;;) {
1263                         t = gettok();
1264                         switch(t) {
1265                         case IDENTIFIER:
1266                                 if((t=chfind(1, tokname)) < NTBASE) {
1267                                         j = TYPE(toklev[t]);
1268                                         if(j != 0 && j != ty)
1269                                                 error("type redeclaration of token %s",
1270                                                         tokset[t].name);
1271                                         else
1272                                                 SETTYPE(toklev[t], ty);
1273                                 } else {
1274                                         j = nontrst[t-NTBASE].value;
1275                                         if(j != 0 && j != ty)
1276                                                 error("type redeclaration of nonterminal %s",
1277                                                         nontrst[t-NTBASE].name );
1278                                         else
1279                                                 nontrst[t-NTBASE].value = ty;
1280                                 }
1281                         case ',':
1282                                 continue;
1283                         case ';':
1284                                 t = gettok();
1285                         default:
1286                                 break;
1287                         }
1288                         break;
1289                 }
1290                 continue;
1291
1292         case UNION:
1293                 /* copy the union declaration to the output */
1294                 cpyunion();
1295                 t = gettok();
1296                 continue;
1297
1298         case LEFT:
1299         case BINARY:
1300         case RIGHT:
1301                 i++;
1302
1303         case TERM:
1304                 /* nonzero means new prec. and assoc. */
1305                 lev = t-TERM;
1306                 ty = 0;
1307
1308                 /* get identifiers so defined */
1309                 t = gettok();
1310
1311                 /* there is a type defined */
1312                 if(t == TYPENAME) {
1313                         ty = numbval;
1314                         t = gettok();
1315                 }
1316                 for(;;) {
1317                         switch(t) {
1318                         case ',':
1319                                 t = gettok();
1320                                 continue;
1321
1322                         case ';':
1323                                 break;
1324
1325                         case IDENTIFIER:
1326                                 j = chfind(0, tokname);
1327                                 if(j >= NTBASE)
1328                                         error("%s defined earlier as nonterminal", tokname);
1329                                 if(lev) {
1330                                         if(ASSOC(toklev[j]))
1331                                                 error("redeclaration of precedence of %s", tokname);
1332                                         SETASC(toklev[j], lev);
1333                                         SETPLEV(toklev[j], i);
1334                                 }
1335                                 if(ty) {
1336                                         if(TYPE(toklev[j]))
1337                                                 error("redeclaration of type of %s", tokname);
1338                                         SETTYPE(toklev[j],ty);
1339                                 }
1340                                 t = gettok();
1341                                 if(t == NUMBER) {
1342                                         tokset[j].value = numbval;
1343                                         if(j < ndefout && j > 3)
1344                                                 error("please define type number of %s earlier",
1345                                                         tokset[j].name);
1346                                         t = gettok();
1347                                 }
1348                                 continue;
1349                         }
1350                         break;
1351                 }
1352                 continue;
1353
1354         case LCURLY:
1355                 defout(0);
1356                 cpycode();
1357                 t = gettok();
1358                 continue;
1359
1360         default:
1361                 error("syntax error");
1362         }
1363         if(t == ENDFILE)
1364                 error("unexpected EOF before %%");
1365
1366         /* t is MARK */
1367         Bprint(ftable, "extern  int     yyerrflag;\n");
1368         Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1369         Bprint(ftable, "#define YYMAXDEPTH      150\n");
1370         Bprint(ftable, "#endif\n" );
1371         if(!ntypes) {
1372                 Bprint(ftable, "#ifndef YYSTYPE\n");
1373                 Bprint(ftable, "#define YYSTYPE int\n");
1374                 Bprint(ftable, "#endif\n");
1375         }
1376         Bprint(ftable, "YYSTYPE yylval;\n");
1377         Bprint(ftable, "YYSTYPE yyval;\n");
1378
1379         prdptr[0] = mem;
1380
1381         /* added production */
1382         *mem++ = NTBASE;
1383
1384         /* if start is 0, we will overwrite with the lhs of the first rule */
1385         *mem++ = start;
1386         *mem++ = 1;
1387         *mem++ = 0;
1388         prdptr[1] = mem;
1389         while((t=gettok()) == LCURLY)
1390                 cpycode();
1391         if(t != IDENTCOLON)
1392                 error("bad syntax on first rule");
1393
1394         if(!start)
1395                 prdptr[0][1] = chfind(1, tokname);
1396
1397         /* read rules */
1398         while(t != MARK && t != ENDFILE) {
1399                 /* process a rule */
1400                 rlines[nprod] = lineno;
1401                 if(t == '|')
1402                         *mem++ = *prdptr[nprod-1];
1403                 else
1404                         if(t == IDENTCOLON) {
1405                                 *mem = chfind(1, tokname);
1406                                 if(*mem < NTBASE)
1407                                         error("token illegal on LHS of grammar rule");
1408                                 mem++;
1409                         } else
1410                                 error("illegal rule: missing semicolon or | ?");
1411                 /* read rule body */
1412                 t = gettok();
1413
1414         more_rule:
1415                 while(t == IDENTIFIER) {
1416                         *mem = chfind(1, tokname);
1417                         if(*mem < NTBASE)
1418                                 levprd[nprod] = toklev[*mem];
1419                         mem++;
1420                         t = gettok();
1421                 }
1422                 if(t == PREC) {
1423                         if(gettok() != IDENTIFIER)
1424                                 error("illegal %%prec syntax");
1425                         j = chfind(2, tokname);
1426                         if(j >= NTBASE)
1427                                 error("nonterminal %s illegal after %%prec",
1428                                         nontrst[j-NTBASE].name);
1429                         levprd[nprod] = toklev[j];
1430                         t = gettok();
1431                 }
1432                 if(t == '=') {
1433                         levprd[nprod] |= ACTFLAG;
1434                         Bprint(faction, "\ncase %d:", nprod);
1435                         cpyact(mem-prdptr[nprod]-1);
1436                         Bprint(faction, " break;");
1437                         if((t=gettok()) == IDENTIFIER) {
1438
1439                                 /* action within rule... */
1440                                 sprint(actnm, "$$%d", nprod);
1441
1442                                 /* make it a nonterminal */
1443                                 j = chfind(1, actnm);
1444
1445                                 /*
1446                                  * the current rule will become rule number nprod+1
1447                                  * move the contents down, and make room for the null
1448                                  */
1449                                 for(p = mem; p >= prdptr[nprod]; --p)
1450                                         p[2] = *p;
1451                                 mem += 2;
1452
1453                                 /* enter null production for action */
1454                                 p = prdptr[nprod];
1455                                 *p++ = j;
1456                                 *p++ = -nprod;
1457
1458                                 /* update the production information */
1459                                 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1460                                 levprd[nprod] = ACTFLAG;
1461                                 if(++nprod >= NPROD)
1462                                         error("more than %d rules", NPROD);
1463                                 prdptr[nprod] = p;
1464
1465                                 /* make the action appear in the original rule */
1466                                 *mem++ = j;
1467
1468                                 /* get some more of the rule */
1469                                 goto more_rule;
1470                         }
1471                 }
1472
1473                 while(t == ';')
1474                         t = gettok();
1475                 *mem++ = -nprod;
1476
1477                 /* check that default action is reasonable */
1478                 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479
1480                         /* no explicit action, LHS has value */
1481                         int tempty;
1482
1483                         tempty = prdptr[nprod][1];
1484                         if(tempty < 0)
1485                                 error("must return a value, since LHS has a type");
1486                         else
1487                                 if(tempty >= NTBASE)
1488                                         tempty = nontrst[tempty-NTBASE].value;
1489                                 else
1490                                         tempty = TYPE(toklev[tempty]);
1491                         if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1492                                 error("default action causes potential type clash");
1493                 }
1494                 nprod++;
1495                 if(nprod >= NPROD)
1496                         error("more than %d rules", NPROD);
1497                 prdptr[nprod] = mem;
1498                 levprd[nprod] = 0;
1499         }
1500
1501         /* end of all rules */
1502         defout(1);
1503
1504         finact();
1505         if(t == MARK) {
1506                 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1507                 while((c=Bgetrune(finput)) != Beof)
1508                         Bputrune(ftable, c);
1509         }
1510         Bterm(finput);
1511 }
1512
1513 /*
1514  * finish action routine
1515  */
1516 void
1517 finact(void)
1518 {
1519
1520         Bterm(faction);
1521         Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1522         Bprint(ftable, "#define YYERRCODE %d\n", 2);
1523 }
1524
1525 /*
1526  * define s to be a terminal if t=0
1527  * or a nonterminal if t=1
1528  */
1529 int
1530 defin(int nt, char *s)
1531 {
1532         int val;
1533         Rune rune;
1534
1535         val = 0;
1536         if(nt) {
1537                 nnonter++;
1538                 if(nnonter >= NNONTERM)
1539                         error("too many nonterminals, limit %d",NNONTERM);
1540                 nontrst[nnonter].name = cstash(s);
1541                 return NTBASE + nnonter;
1542         }
1543
1544         /* must be a token */
1545         ntokens++;
1546         if(ntokens >= NTERMS)
1547                 error("too many terminals, limit %d", NTERMS);
1548         tokset[ntokens].name = cstash(s);
1549
1550         /* establish value for token */
1551         /* single character literal */
1552         if(s[0] == ' ') {
1553                 val = chartorune(&rune, &s[1]);
1554                 if(s[val+1] == 0) {
1555                         val = rune;
1556                         goto out;
1557                 }
1558         }
1559
1560         /* escape sequence */
1561         if(s[0] == ' ' && s[1] == '\\') {
1562                 if(s[3] == 0) {
1563                         /* single character escape sequence */
1564                         switch(s[2]) {
1565                         case 'n':       val = '\n'; break;
1566                         case 'r':       val = '\r'; break;
1567                         case 'b':       val = '\b'; break;
1568                         case 't':       val = '\t'; break;
1569                         case 'f':       val = '\f'; break;
1570                         case '\'':      val = '\''; break;
1571                         case '"':       val = '"'; break;
1572                         case '\\':      val = '\\'; break;
1573                         default:        error("invalid escape");
1574                         }
1575                         goto out;
1576                 }
1577
1578                 /* \nnn sequence */
1579                 if(s[2] >= '0' && s[2] <= '7') {
1580                         if(s[3] < '0' ||
1581                            s[3] > '7' ||
1582                            s[4] < '0' ||
1583                            s[4] > '7' ||
1584                            s[5] != 0)
1585                                 error("illegal \\nnn construction");
1586                         val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1587                         if(val == 0)
1588                                 error("'\\000' is illegal");
1589                         goto out;
1590                 }
1591                 error("unknown escape");
1592         }
1593         val = extval++;
1594
1595 out:
1596         tokset[ntokens].value = val;
1597         toklev[ntokens] = 0;
1598         return ntokens;
1599 }
1600
1601 /*
1602  * write out the defines (at the end of the declaration section)
1603  */
1604 void
1605 defout(int last)
1606 {
1607         int i, c;
1608         char sar[NAMESIZE+10];
1609
1610         for(i=ndefout; i<=ntokens; i++) {
1611                 /* non-literals */
1612                 c = tokset[i].name[0];
1613                 if(c != ' ' && c != '$') {
1614                         Bprint(ftable, "#define %s      %d\n",
1615                                 tokset[i].name, tokset[i].value);
1616                         if(fdefine)
1617                                 Bprint(fdefine, "#define\t%s\t%d\n",
1618                                         tokset[i].name, tokset[i].value);
1619                 }
1620         }
1621         ndefout = ntokens+1;
1622         if(last && fdebug) {
1623                 Bprint(fdebug, "char*   yytoknames[] =\n{\n");
1624                 TLOOP(i) {
1625                         if(tokset[i].name) {
1626                                 chcopy(sar, tokset[i].name);
1627                                 Bprint(fdebug, "\t\"%s\",\n", sar);
1628                                 continue;
1629                         }
1630                         Bprint(fdebug, "\t0,\n");
1631                 }
1632                 Bprint(fdebug, "};\n");
1633         }
1634 }
1635
1636 char*
1637 cstash(char *s)
1638 {
1639         char *temp;
1640
1641         temp = cnamp;
1642         do {
1643                 if(cnamp >= &cnames[cnamsz])
1644                         error("too many characters in id's and literals");
1645                 else
1646                         *cnamp++ = *s;
1647         } while(*s++);
1648         return temp;
1649 }
1650
1651 long
1652 gettok(void)
1653 {
1654         long c;
1655         Rune rune;
1656         int i, base, match, reserve;
1657         static int peekline;
1658
1659 begin:
1660         reserve = 0;
1661         lineno += peekline;
1662         peekline = 0;
1663         c = Bgetrune(finput);
1664         while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1665                 if(c == '\n')
1666                         lineno++;
1667                 c = Bgetrune(finput);
1668         }
1669
1670         /* skip comment */
1671         if(c == '/') {
1672                 lineno += skipcom();
1673                 goto begin;
1674         }
1675         switch(c) {
1676         case Beof:
1677                 return ENDFILE;
1678
1679         case '{':
1680                 Bungetrune(finput);
1681                 return '=';
1682
1683         case '<':
1684                 /* get, and look up, a type name (union member name) */
1685                 i = 0;
1686                 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1687                         rune = c;
1688                         c = runetochar(&tokname[i], &rune);
1689                         if(i < NAMESIZE)
1690                                 i += c;
1691                 }
1692                 if(c != '>')
1693                         error("unterminated < ... > clause");
1694                 tokname[i] = 0;
1695                 for(i=1; i<=ntypes; i++)
1696                         if(!strcmp(typeset[i], tokname)) {
1697                                 numbval = i;
1698                                 return TYPENAME;
1699                         }
1700                 ntypes++;
1701                 numbval = ntypes;
1702                 typeset[numbval] = cstash(tokname);
1703                 return TYPENAME;
1704
1705         case '"':
1706         case '\'':
1707                 match = c;
1708                 tokname[0] = ' ';
1709                 i = 1;
1710                 for(;;) {
1711                         c = Bgetrune(finput);
1712                         if(c == '\n' || c <= 0)
1713                                 error("illegal or missing ' or \"" );
1714                         if(c == '\\') {
1715                                 tokname[i] = '\\';
1716                                 if(i < NAMESIZE)
1717                                         i++;
1718                                 c = Bgetrune(finput);
1719                         } else
1720                                 if(c == match)
1721                                         break;
1722                         rune = c;
1723                         c = runetochar(&tokname[i], &rune);
1724                         if(i < NAMESIZE)
1725                                 i += c;
1726                 }
1727                 break;
1728
1729         case '%':
1730         case '\\':
1731                 switch(c = Bgetrune(finput)) {
1732                 case '0':       return TERM;
1733                 case '<':       return LEFT;
1734                 case '2':       return BINARY;
1735                 case '>':       return RIGHT;
1736                 case '%':
1737                 case '\\':      return MARK;
1738                 case '=':       return PREC;
1739                 case '{':       return LCURLY;
1740                 default:        reserve = 1;
1741                 }
1742
1743         default:
1744                 /* number */
1745                 if(isdigit(c)) {
1746                         numbval = c-'0';
1747                         base = (c=='0')? 8: 10;
1748                         for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1749                                 numbval = numbval*base + (c-'0');
1750                         Bungetrune(finput);
1751                         return NUMBER;
1752                 }
1753                 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1754                         i = 0;
1755                         while(islower(c) || isupper(c) || isdigit(c) ||
1756                             c == '-' || c=='_' || c=='.' || c=='$') {
1757                                 if(reserve && isupper(c))
1758                                         c += 'a'-'A';
1759                                 rune = c;
1760                                 c = runetochar(&tokname[i], &rune);
1761                                 if(i < NAMESIZE)
1762                                         i += c;
1763                                 c = Bgetrune(finput);
1764                         }
1765                 } else
1766                         return c;
1767                 Bungetrune(finput);
1768         }
1769         tokname[i] = 0;
1770
1771         /* find a reserved word */
1772         if(reserve) {
1773                 for(c=0; resrv[c].name; c++)
1774                         if(strcmp(tokname, resrv[c].name) == 0)
1775                                 return resrv[c].value;
1776                 error("invalid escape, or illegal reserved word: %s", tokname);
1777         }
1778
1779         /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1780         c = Bgetrune(finput);
1781         while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1782                 if(c == '\n')
1783                         peekline++;
1784                 /* look for comments */
1785                 if(c == '/')
1786                         peekline += skipcom();
1787                 c = Bgetrune(finput);
1788         }
1789         if(c == ':')
1790                 return IDENTCOLON;
1791         Bungetrune(finput);
1792         return IDENTIFIER;
1793 }
1794
1795 /*
1796  * determine the type of a symbol
1797  */
1798 int
1799 fdtype(int t)
1800 {
1801         int v;
1802
1803         if(t >= NTBASE)
1804                 v = nontrst[t-NTBASE].value;
1805         else
1806                 v = TYPE(toklev[t]);
1807         if(v <= 0)
1808                 error("must specify type for %s", (t>=NTBASE)?
1809                         nontrst[t-NTBASE].name: tokset[t].name);
1810         return v;
1811 }
1812
1813 int
1814 chfind(int t, char *s)
1815 {
1816         int i;
1817
1818         if(s[0] == ' ')
1819                 t = 0;
1820         TLOOP(i)
1821                 if(!strcmp(s, tokset[i].name))
1822                         return i;
1823         NTLOOP(i)
1824                 if(!strcmp(s, nontrst[i].name))
1825                         return NTBASE+i;
1826
1827         /* cannot find name */
1828         if(t > 1)
1829                 error("%s should have been defined earlier", s);
1830         return defin(t, s);
1831 }
1832
1833 /*
1834  * copy the union declaration to the output, and the define file if present
1835  */
1836 void
1837 cpyunion(void)
1838 {
1839         long c;
1840         int level;
1841
1842         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1843         Bprint(ftable, "typedef union ");
1844         if(fdefine != 0)
1845                 Bprint(fdefine, "\ntypedef union ");
1846
1847         level = 0;
1848         for(;;) {
1849                 if((c=Bgetrune(finput)) == Beof)
1850                         error("EOF encountered while processing %%union");
1851                 Bputrune(ftable, c);
1852                 if(fdefine != 0)
1853                         Bputrune(fdefine, c);
1854                 switch(c) {
1855                 case '\n':
1856                         lineno++;
1857                         break;
1858                 case '{':
1859                         level++;
1860                         break;
1861                 case '}':
1862                         level--;
1863
1864                         /* we are finished copying */
1865                         if(level == 0) {
1866                                 Bprint(ftable, " YYSTYPE;\n");
1867                                 if(fdefine != 0)
1868                                         Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1869                                 return;
1870                         }
1871                 }
1872         }
1873 }
1874
1875 /*
1876  * copies code between \{ and \}
1877  */
1878 void
1879 cpycode(void)
1880 {
1881
1882         long c;
1883
1884         c = Bgetrune(finput);
1885         if(c == '\n') {
1886                 c = Bgetrune(finput);
1887                 lineno++;
1888         }
1889         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1890         while(c != Beof) {
1891                 if(c == '\\') {
1892                         if((c=Bgetrune(finput)) == '}')
1893                                 return;
1894                         Bputc(ftable, '\\');
1895                 }
1896                 if(c == '%') {
1897                         if((c=Bgetrune(finput)) == '}')
1898                                 return;
1899                         Bputc(ftable, '%');
1900                 }
1901                 Bputrune(ftable, c);
1902                 if(c == '\n')
1903                         lineno++;
1904                 c = Bgetrune(finput);
1905         }
1906         error("eof before %%}");
1907 }
1908
1909 /*
1910  * skip over comments
1911  * skipcom is called after reading a '/'
1912  */
1913 int
1914 skipcom(void)
1915 {
1916         long c;
1917         int i;
1918
1919         /* i is the number of lines skipped */
1920         i = 0;
1921         c = Bgetrune(finput);
1922         if(c == '/'){                   /* C++ //: skip to end of line */
1923                 while((c = Bgetrune(finput)) != Beof)
1924                         if(c == '\n')
1925                                 return 1;
1926         }else if(c == '*'){             /* normal C comment */
1927                 while((c = Bgetrune(finput)) != Beof) {
1928                         while(c == '*')
1929                                 if((c = Bgetrune(finput)) == '/')
1930                                         return i;
1931                         if(c == '\n')
1932                                 i++;
1933                 }
1934         }else
1935                 error("illegal comment");
1936
1937         error("EOF inside comment");
1938         return 0;
1939 }
1940
1941 /*
1942  * copy C action to the next ; or closing }
1943  */
1944 void
1945 cpyact(int offset)
1946 {
1947         long c;
1948         int brac, match, j, s, fnd, tok;
1949
1950         Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951         brac = 0;
1952
1953 loop:
1954         c = Bgetrune(finput);
1955 swt:
1956         switch(c) {
1957         case ';':
1958                 if(brac == 0) {
1959                         Bputrune(faction, c);
1960                         return;
1961                 }
1962                 goto lcopy;
1963
1964         case '{':
1965                 brac++;
1966                 goto lcopy;
1967
1968         case '$':
1969                 s = 1;
1970                 tok = -1;
1971                 c = Bgetrune(finput);
1972
1973                 /* type description */
1974                 if(c == '<') {
1975                         Bungetrune(finput);
1976                         if(gettok() != TYPENAME)
1977                                 error("bad syntax on $<ident> clause");
1978                         tok = numbval;
1979                         c = Bgetrune(finput);
1980                 }
1981                 if(c == '$') {
1982                         Bprint(faction, "yyval");
1983
1984                         /* put out the proper tag... */
1985                         if(ntypes) {
1986                                 if(tok < 0)
1987                                         tok = fdtype(*prdptr[nprod]);
1988                                 Bprint(faction, ".%s", typeset[tok]);
1989                         }
1990                         goto loop;
1991                 }
1992                 if(c == '-') {
1993                         s = -s;
1994                         c = Bgetrune(finput);
1995                 }
1996                 if(isdigit(c)) {
1997                         j = 0;
1998                         while(isdigit(c)) {
1999                                 j = j*10 + (c-'0');
2000                                 c = Bgetrune(finput);
2001                         }
2002                         Bungetrune(finput);
2003                         j = j*s - offset;
2004                         if(j > 0)
2005                                 error("Illegal use of $%d", j+offset);
2006
2007                 dollar:
2008                         Bprint(faction, "yypt[-%d].yyv", -j);
2009
2010                         /* put out the proper tag */
2011                         if(ntypes) {
2012                                 if(j+offset <= 0 && tok < 0)
2013                                         error("must specify type of $%d", j+offset);
2014                                 if(tok < 0)
2015                                         tok = fdtype(prdptr[nprod][j+offset]);
2016                                 Bprint(faction, ".%s", typeset[tok]);
2017                         }
2018                         goto loop;
2019                 }
2020                 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021                         int tok; /* tok used oustide for type info */
2022
2023                         /* look for $name */
2024                         Bungetrune(finput);
2025                         if(gettok() != IDENTIFIER)
2026                                 error("$ must be followed by an identifier");
2027                         tok = chfind(2, tokname);
2028                         if((c = Bgetrune(finput)) != '#') {
2029                                 Bungetrune(finput);
2030                                 fnd = -1;
2031                         } else
2032                                 if(gettok() != NUMBER) {
2033                                         error("# must be followed by number");
2034                                         fnd = -1;
2035                                 } else
2036                                         fnd = numbval;
2037                         for(j=1; j<=offset; ++j)
2038                                 if(tok == prdptr[nprod][j]) {
2039                                         if(--fnd <= 0) {
2040                                                 j -= offset;
2041                                                 goto dollar;
2042                                         }
2043                                 }
2044                         error("$name or $name#number not found");
2045                 }
2046                 Bputc(faction, '$');
2047                 if(s < 0 )
2048                         Bputc(faction, '-');
2049                 goto swt;
2050
2051         case '}':
2052                 brac--;
2053                 if(brac)
2054                         goto lcopy;
2055                 Bputrune(faction, c);
2056                 return;
2057
2058         case '/':
2059                 /* look for comments */
2060                 Bputrune(faction, c);
2061                 c = Bgetrune(finput);
2062                 if(c != '*')
2063                         goto swt;
2064
2065                 /* it really is a comment */
2066                 Bputrune(faction, c);
2067                 c = Bgetrune(finput);
2068                 while(c >= 0) {
2069                         while(c == '*') {
2070                                 Bputrune(faction, c);
2071                                 if((c=Bgetrune(finput)) == '/')
2072                                         goto lcopy;
2073                         }
2074                         Bputrune(faction, c);
2075                         if(c == '\n')
2076                                 lineno++;
2077                         c = Bgetrune(finput);
2078                 }
2079                 error("EOF inside comment");
2080
2081         case '\'':
2082                 /* character constant */
2083                 match = '\'';
2084                 goto string;
2085
2086         case '"':
2087                 /* character string */
2088                 match = '"';
2089
2090         string:
2091                 Bputrune(faction, c);
2092                 while(c = Bgetrune(finput)) {
2093                         if(c == '\\') {
2094                                 Bputrune(faction, c);
2095                                 c = Bgetrune(finput);
2096                                 if(c == '\n')
2097                                         lineno++;
2098                         } else
2099                                 if(c == match)
2100                                         goto lcopy;
2101                                 if(c == '\n')
2102                                         error("newline in string or char. const.");
2103                         Bputrune(faction, c);
2104                 }
2105                 error("EOF in string or character constant");
2106
2107         case Beof:
2108                 error("action does not terminate");
2109
2110         case '\n':
2111                 lineno++;
2112                 goto lcopy;
2113         }
2114
2115 lcopy:
2116         Bputrune(faction, c);
2117         goto loop;
2118 }
2119
2120 void
2121 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2122 {
2123         char buf[256];
2124
2125         if(vflag) {
2126                 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2127                 foutput = Bopen(buf, OWRITE);
2128                 if(foutput == 0)
2129                         error("cannot open %s", buf);
2130         }
2131         if(yydebug) {
2132                 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2133                 if((fdebug = Bopen(buf, OWRITE)) == 0)
2134                         error("can't open %s", buf);
2135         }
2136         if(dflag) {
2137                 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2138                 fdefine = Bopen(buf, OWRITE);
2139                 if(fdefine == 0)
2140                         error("can't create %s", buf);
2141         }
2142         if(ytab == 0)
2143                 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2144         else
2145                 strecpy(buf, buf+sizeof buf, ytabc);
2146         ftable = Bopen(buf, OWRITE);
2147         if(ftable == 0)
2148                 error("cannot open table file %s", buf);
2149 }
2150
2151 /*
2152  * print the output for the states
2153  */
2154 void
2155 output(void)
2156 {
2157         int i, k, c;
2158         Wset *u, *v;
2159
2160         Bprint(ftable, "short   yyexca[] =\n{");
2161         if(fdebug)
2162                 Bprint(fdebug, "char*   yystates[] =\n{\n");
2163
2164         /* output the stuff for state i */
2165         SLOOP(i) {
2166                 nolook = tystate[i]!=MUSTLOOKAHEAD;
2167                 closure(i);
2168
2169                 /* output actions */
2170                 nolook = 1;
2171                 aryfil(temp1, ntokens+nnonter+1, 0);
2172                 WSLOOP(wsets, u) {
2173                         c = *(u->pitem);
2174                         if(c > 1 && c < NTBASE && temp1[c] == 0) {
2175                                 WSLOOP(u, v)
2176                                         if(c == *(v->pitem))
2177                                                 putitem(v->pitem+1, (Lkset*)0);
2178                                 temp1[c] = state(c);
2179                         } else
2180                                 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2181                                         temp1[c+ntokens] = amem[indgo[i]+c];
2182                 }
2183                 if(i == 1)
2184                         temp1[1] = ACCEPTCODE;
2185
2186                 /* now, we have the shifts; look at the reductions */
2187                 lastred = 0;
2188                 WSLOOP(wsets, u) {
2189                         c = *u->pitem;
2190
2191                         /* reduction */
2192                         if(c <= 0) {
2193                                 lastred = -c;
2194                                 TLOOP(k)
2195                                         if(BIT(u->ws.lset, k)) {
2196                                                 if(temp1[k] == 0)
2197                                                         temp1[k] = c;
2198                                                 else
2199                                                 if(temp1[k] < 0) { /* reduce/reduce conflict */
2200                                                         if(foutput)
2201                                                                 Bprint(foutput,
2202                                                                         "\n%d: reduce/reduce conflict"
2203                                                                         " (red'ns %d and %d ) on %s",
2204                                                                         i, -temp1[k], lastred,
2205                                                                         symnam(k));
2206                                                         if(-temp1[k] > lastred)
2207                                                                 temp1[k] = -lastred;
2208                                                         zzrrconf++;
2209                                                 } else
2210                                                         /* potential shift/reduce conflict */
2211                                                         precftn( lastred, k, i );
2212                                         }
2213                                 }
2214                 }
2215                 wract(i);
2216         }
2217
2218         if(fdebug)
2219                 Bprint(fdebug, "};\n");
2220         Bprint(ftable, "};\n");
2221         Bprint(ftable, "#define YYNPROD %d\n", nprod);
2222         Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2223         if(yydebug)
2224                 Bprint(ftable, "#define yydebug %s\n", yydebug);
2225 }
2226
2227 /*
2228  * pack state i from temp1 into amem
2229  */
2230 int
2231 apack(int *p, int n)
2232 {
2233         int *pp, *qq, *rr, off, *q, *r;
2234
2235         /* we don't need to worry about checking because
2236          * we will only look at entries known to be there...
2237          * eliminate leading and trailing 0's
2238          */
2239
2240         q = p+n;
2241         for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2242                 ;
2243         /* no actions */
2244         if(pp > q)
2245                 return 0;
2246         p = pp;
2247
2248         /* now, find a place for the elements from p to q, inclusive */
2249         r = &amem[ACTSIZE-1];
2250         for(rr = amem; rr <= r; rr++, off++) {
2251                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2252                         if(*pp != 0)
2253                                 if(*pp != *qq && *qq != 0)
2254                                         goto nextk;
2255
2256                 /* we have found an acceptable k */
2257                 if(pkdebug && foutput != 0)
2258                         Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2259                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260                         if(*pp) {
2261                                 if(qq > r)
2262                                         error("action table overflow");
2263                                 if(qq > memp)
2264                                         memp = qq;
2265                                 *qq = *pp;
2266                         }
2267                 if(pkdebug && foutput != 0)
2268                         for(pp = amem; pp <= memp; pp += 10) {
2269                                 Bprint(foutput, "\t");
2270                                 for(qq = pp; qq <= pp+9; qq++)
2271                                         Bprint(foutput, "%d ", *qq);
2272                                 Bprint(foutput, "\n");
2273                         }
2274                 return(off);
2275         nextk:;
2276         }
2277         error("no space in action table");
2278         return 0;
2279 }
2280
2281 /*
2282  * output the gotos for the nontermninals
2283  */
2284 void
2285 go2out(void)
2286 {
2287         int i, j, k, best, count, cbest, times;
2288
2289         /* mark begining of gotos */
2290         Bprint(ftemp, "$\n");
2291         for(i = 1; i <= nnonter; i++) {
2292                 go2gen(i);
2293
2294                 /* find the best one to make default */
2295                 best = -1;
2296                 times = 0;
2297
2298                 /* is j the most frequent */
2299                 for(j = 0; j <= nstate; j++) {
2300                         if(tystate[j] == 0)
2301                                 continue;
2302                         if(tystate[j] == best)
2303                                 continue;
2304
2305                         /* is tystate[j] the most frequent */
2306                         count = 0;
2307                         cbest = tystate[j];
2308                         for(k = j; k <= nstate; k++)
2309                                 if(tystate[k] == cbest)
2310                                         count++;
2311                         if(count > times) {
2312                                 best = cbest;
2313                                 times = count;
2314                         }
2315                 }
2316
2317                 /* best is now the default entry */
2318                 zzgobest += times-1;
2319                 for(j = 0; j <= nstate; j++)
2320                         if(tystate[j] != 0 && tystate[j] != best) {
2321                                 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2322                                 zzgoent++;
2323                         }
2324
2325                 /* now, the default */
2326                 if(best == -1)
2327                         best = 0;
2328                 zzgoent++;
2329                 Bprint(ftemp, "%d\n", best);
2330         }
2331 }
2332
2333 /*
2334  * output the gotos for nonterminal c
2335  */
2336 void
2337 go2gen(int c)
2338 {
2339         int i, work, cc;
2340         Item *p, *q;
2341
2342
2343         /* first, find nonterminals with gotos on c */
2344         aryfil(temp1, nnonter+1, 0);
2345         temp1[c] = 1;
2346         work = 1;
2347         while(work) {
2348                 work = 0;
2349                 PLOOP(0, i)
2350
2351                         /* cc is a nonterminal */
2352                         if((cc=prdptr[i][1]-NTBASE) >= 0)
2353                                 /* cc has a goto on c */
2354                                 if(temp1[cc] != 0) {
2355
2356                                         /* thus, the left side of production i does too */
2357                                         cc = *prdptr[i]-NTBASE;
2358                                         if(temp1[cc] == 0) {
2359                                                   work = 1;
2360                                                   temp1[cc] = 1;
2361                                         }
2362                                 }
2363         }
2364
2365         /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2366         if(g2debug && foutput != 0) {
2367                 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2368                 NTLOOP(i)
2369                         if(temp1[i])
2370                                 Bprint(foutput, "%s ", nontrst[i].name);
2371                 Bprint(foutput, "\n");
2372         }
2373
2374         /* now, go through and put gotos into tystate */
2375         aryfil(tystate, nstate, 0);
2376         SLOOP(i)
2377                 ITMLOOP(i, p, q)
2378                         if((cc = *p->pitem) >= NTBASE)
2379                                 /* goto on c is possible */
2380                                 if(temp1[cc-NTBASE]) {
2381                                         tystate[i] = amem[indgo[i]+c];
2382                                         break;
2383                                 }
2384 }
2385
2386 /*
2387  * decide a shift/reduce conflict by precedence.
2388  * r is a rule number, t a token number
2389  * the conflict is in state s
2390  * temp1[t] is changed to reflect the action
2391  */
2392 void
2393 precftn(int r, int t, int s)
2394 {
2395         int lp, lt, action;
2396
2397         lp = levprd[r];
2398         lt = toklev[t];
2399         if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2400
2401                 /* conflict */
2402                 if(foutput != 0)
2403                         Bprint(foutput,
2404                                 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2405                                 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2406                 zzsrconf++;
2407                 return;
2408         }
2409         if(PLEVEL(lt) == PLEVEL(lp))
2410                 action = ASSOC(lt);
2411         else
2412                 if(PLEVEL(lt) > PLEVEL(lp))
2413                         action = RASC;  /* shift */
2414                 else
2415                         action = LASC;  /* reduce */
2416         switch(action) {
2417         case BASC:  /* error action */
2418                 temp1[t] = ERRCODE;
2419                 break;
2420         case LASC:  /* reduce */
2421                 temp1[t] = -r;
2422                 break;
2423         }
2424 }
2425
2426 /*
2427  * output state i
2428  * temp1 has the actions, lastred the default
2429  */
2430 void
2431 wract(int i)
2432 {
2433         int p, p0, p1, ntimes, tred, count, j, flag;
2434
2435         /* find the best choice for lastred */
2436         lastred = 0;
2437         ntimes = 0;
2438         TLOOP(j) {
2439                 if(temp1[j] >= 0)
2440                         continue;
2441                 if(temp1[j]+lastred == 0)
2442                         continue;
2443                 /* count the number of appearances of temp1[j] */
2444                 count = 0;
2445                 tred = -temp1[j];
2446                 levprd[tred] |= REDFLAG;
2447                 TLOOP(p)
2448                         if(temp1[p]+tred == 0)
2449                                 count++;
2450                 if(count > ntimes) {
2451                         lastred = tred;
2452                         ntimes = count;
2453                 }
2454         }
2455
2456         /*
2457          * for error recovery, arrange that, if there is a shift on the
2458          * error recovery token, `error', that the default be the error action
2459          */
2460         if(temp1[2] > 0)
2461                 lastred = 0;
2462
2463         /* clear out entries in temp1 which equal lastred */
2464         TLOOP(p)
2465                 if(temp1[p]+lastred == 0)
2466                         temp1[p] = 0;
2467
2468         wrstate(i);
2469         defact[i] = lastred;
2470         flag = 0;
2471         TLOOP(p0)
2472                 if((p1=temp1[p0]) != 0) {
2473                         if(p1 < 0) {
2474                                 p1 = -p1;
2475                                 goto exc;
2476                         }
2477                         if(p1 == ACCEPTCODE) {
2478                                 p1 = -1;
2479                                 goto exc;
2480                         }
2481                         if(p1 == ERRCODE) {
2482                                 p1 = 0;
2483                         exc:
2484                                 if(flag++ == 0)
2485                                         Bprint(ftable, "-1, %d,\n", i);
2486                                 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2487                                 zzexcp++;
2488                                 continue;
2489                         }
2490                         Bprint(ftemp, "%d,%d,", p0, p1);
2491                         zzacent++;
2492                 }
2493         if(flag) {
2494                 defact[i] = -2;
2495                 Bprint(ftable, "\t-2, %d,\n", lastred);
2496         }
2497         Bprint(ftemp, "\n");
2498 }
2499
2500 /*
2501  * writes state i
2502  */
2503 void
2504 wrstate(int i)
2505 {
2506         int j0, j1;
2507         Item *pp, *qq;
2508         Wset *u;
2509
2510         if(fdebug) {
2511                 if(lastred) {
2512                         Bprint(fdebug, "        0, /*%d*/\n", i);
2513                 } else {
2514                         Bprint(fdebug, "        \"");
2515                         ITMLOOP(i, pp, qq)
2516                                 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2517                         if(tystate[i] == MUSTLOOKAHEAD)
2518                                 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2519                                         if(*u->pitem < 0)
2520                                                 Bprint(fdebug, "%s\\n", writem(u->pitem));
2521                         Bprint(fdebug, "\", /*%d*/\n", i);
2522                 }
2523         }
2524         if(foutput == 0)
2525                 return;
2526         Bprint(foutput, "\nstate %d\n", i);
2527         ITMLOOP(i, pp, qq)
2528                 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2529         if(tystate[i] == MUSTLOOKAHEAD)
2530                 /* print out empty productions in closure */
2531                 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2532                         if(*u->pitem < 0)
2533                                 Bprint(foutput, "\t%s\n", writem(u->pitem));
2534
2535         /* check for state equal to another */
2536         TLOOP(j0)
2537                 if((j1=temp1[j0]) != 0) {
2538                         Bprint(foutput, "\n\t%s  ", symnam(j0));
2539                         /* shift, error, or accept */
2540                         if(j1 > 0) {
2541                                 if(j1 == ACCEPTCODE)
2542                                         Bprint(foutput,  "accept");
2543                                 else
2544                                         if(j1 == ERRCODE)
2545                                                 Bprint(foutput, "error");
2546                                         else
2547                                                 Bprint(foutput, "shift %d", j1);
2548                         } else
2549                                 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2550                 }
2551
2552         /* output the final production */
2553         if(lastred)
2554                 Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2555                         lastred, rlines[lastred]);
2556         else
2557                 Bprint(foutput, "\n\t.  error\n\n");
2558
2559         /* now, output nonterminal actions */
2560         j1 = ntokens;
2561         for(j0 = 1; j0 <= nnonter; j0++) {
2562                 j1++;
2563                 if(temp1[j1])
2564                         Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2565         }
2566 }
2567
2568 void
2569 warray(char *s, int *v, int n)
2570 {
2571         int i;
2572
2573         Bprint(ftable, "short   %s[] =\n{", s);
2574         for(i=0;;) {
2575                 if(i%10 == 0)
2576                         Bprint(ftable, "\n");
2577                 Bprint(ftable, "%4d", v[i]);
2578                 i++;
2579                 if(i >= n) {
2580                         Bprint(ftable, "\n};\n");
2581                         break;
2582                 }
2583                 Bprint(ftable, ",");
2584         }
2585 }
2586
2587 /*
2588  * in order to free up the mem and amem arrays for the optimizer,
2589  * and still be able to output yyr1, etc., after the sizes of
2590  * the action array is known, we hide the nonterminals
2591  * derived by productions in levprd.
2592  */
2593
2594 void
2595 hideprod(void)
2596 {
2597         int i, j;
2598
2599         j = 0;
2600         levprd[0] = 0;
2601         PLOOP(1, i) {
2602                 if(!(levprd[i] & REDFLAG)) {
2603                         j++;
2604                         if(foutput != 0)
2605                                 Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2606                 }
2607                 levprd[i] = *prdptr[i] - NTBASE;
2608         }
2609         if(j)
2610                 print("%d rules never reduced\n", j);
2611 }
2612
2613 void
2614 callopt(void)
2615 {
2616         int i, *p, j, k, *q;
2617
2618         /* read the arrays from tempfile and set parameters */
2619         finput = Bopen(tempname, OREAD);
2620         if(finput == 0)
2621                 error("optimizer cannot open tempfile");
2622
2623         pgo[0] = 0;
2624         temp1[0] = 0;
2625         nstate = 0;
2626         nnonter = 0;
2627         for(;;) {
2628                 switch(gtnm()) {
2629                 case '\n':
2630                         nstate++;
2631                         pmem--;
2632                         temp1[nstate] = pmem - mem0;
2633                 case ',':
2634                         continue;
2635                 case '$':
2636                         break;
2637                 default:
2638                         error("bad tempfile");
2639                 }
2640                 break;
2641         }
2642
2643         pmem--;
2644         temp1[nstate] = yypgo[0] = pmem - mem0;
2645         for(;;) {
2646                 switch(gtnm()) {
2647                 case '\n':
2648                         nnonter++;
2649                         yypgo[nnonter] = pmem-mem0;
2650                 case ',':
2651                         continue;
2652                 case -1:
2653                         break;
2654                 default:
2655                         error("bad tempfile");
2656                 }
2657                 break;
2658         }
2659         pmem--;
2660         yypgo[nnonter--] = pmem - mem0;
2661         for(i = 0; i < nstate; i++) {
2662                 k = 32000;
2663                 j = 0;
2664                 q = mem0 + temp1[i+1];
2665                 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2666                         if(*p > j)
2667                                 j = *p;
2668                         if(*p < k)
2669                                 k = *p;
2670                 }
2671                 /* nontrivial situation */
2672                 if(k <= j) {
2673                         /* j is now the range */
2674 /*                      j -= k;                 /* call scj */
2675                         if(k > maxoff)
2676                                 maxoff = k;
2677                 }
2678                 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2679                 if(j > maxspr)
2680                         maxspr = j;
2681         }
2682
2683         /* initialize ggreed table */
2684         for(i = 1; i <= nnonter; i++) {
2685                 ggreed[i] = 1;
2686                 j = 0;
2687
2688                 /* minimum entry index is always 0 */
2689                 q = mem0 + yypgo[i+1] - 1;
2690                 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2691                         ggreed[i] += 2;
2692                         if(*p > j)
2693                                 j = *p;
2694                 }
2695                 ggreed[i] = ggreed[i] + 2*j;
2696                 if(j > maxoff)
2697                         maxoff = j;
2698         }
2699
2700         /* now, prepare to put the shift actions into the amem array */
2701         for(i = 0; i < ACTSIZE; i++)
2702                 amem[i] = 0;
2703         maxa = amem;
2704         for(i = 0; i < nstate; i++) {
2705                 if(tystate[i] == 0 && adb > 1)
2706                         Bprint(ftable, "State %d: null\n", i);
2707                 indgo[i] = YYFLAG1;
2708         }
2709         while((i = nxti()) != NOMORE)
2710                 if(i >= 0)
2711                         stin(i);
2712                 else
2713                         gin(-i);
2714
2715         /* print amem array */
2716         if(adb > 2 )
2717                 for(p = amem; p <= maxa; p += 10) {
2718                         Bprint(ftable, "%4d  ", (int)(p-amem));
2719                         for(i = 0; i < 10; ++i)
2720                                 Bprint(ftable, "%4d  ", p[i]);
2721                         Bprint(ftable, "\n");
2722                 }
2723
2724         /* write out the output appropriate to the language */
2725         aoutput();
2726         osummary();
2727         ZAPFILE(tempname);
2728 }
2729
2730 void
2731 gin(int i)
2732 {
2733         int *p, *r, *s, *q1, *q2;
2734
2735         /* enter gotos on nonterminal i into array amem */
2736         ggreed[i] = 0;
2737
2738         q2 = mem0+ yypgo[i+1] - 1;
2739         q1 = mem0 + yypgo[i];
2740
2741         /* now, find amem place for it */
2742         for(p = amem; p < &amem[ACTSIZE]; p++) {
2743                 if(*p)
2744                         continue;
2745                 for(r = q1; r < q2; r += 2) {
2746                         s = p + *r + 1;
2747                         if(*s)
2748                                 goto nextgp;
2749                         if(s > maxa)
2750                                 if((maxa = s) > &amem[ACTSIZE])
2751                                         error("a array overflow");
2752                 }
2753                 /* we have found amem spot */
2754                 *p = *q2;
2755                 if(p > maxa)
2756                         if((maxa = p) > &amem[ACTSIZE])
2757                                 error("a array overflow");
2758                 for(r = q1; r < q2; r += 2) {
2759                         s = p + *r + 1;
2760                         *s = r[1];
2761                 }
2762                 pgo[i] = p-amem;
2763                 if(adb > 1)
2764                         Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2765                 return;
2766
2767         nextgp:;
2768         }
2769         error("cannot place goto %d\n", i);
2770 }
2771
2772 void
2773 stin(int i)
2774 {
2775         int *r, *s, n, flag, j, *q1, *q2;
2776
2777         tystate[i] = 0;
2778
2779         /* enter state i into the amem array */
2780         q2 = mem0+temp1[i+1];
2781         q1 = mem0+temp1[i];
2782         /* find an acceptable place */
2783         for(n = -maxoff; n < ACTSIZE; n++) {
2784                 flag = 0;
2785                 for(r = q1; r < q2; r += 2) {
2786                         if((s = *r + n + amem) < amem)
2787                                 goto nextn;
2788                         if(*s == 0)
2789                                 flag++;
2790                         else
2791                                 if(*s != r[1])
2792                                         goto nextn;
2793                 }
2794
2795                 /* check that the position equals another only if the states are identical */
2796                 for(j=0; j<nstate; j++) {
2797                         if(indgo[j] == n) {
2798
2799                                 /* we have some disagreement */
2800                                 if(flag)
2801                                         goto nextn;
2802                                 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2803
2804                                         /* states are equal */
2805                                         indgo[i] = n;
2806                                         if(adb > 1)
2807                                                 Bprint(ftable,
2808                                                 "State %d: entry at %d equals state %d\n",
2809                                                 i, n, j);
2810                                         return;
2811                                 }
2812
2813                                 /* we have some disagreement */
2814                                 goto nextn;
2815                         }
2816                 }
2817
2818                 for(r = q1; r < q2; r += 2) {
2819                         if((s = *r+n+amem) >= &amem[ACTSIZE])
2820                                 error("out of space in optimizer a array");
2821                         if(s > maxa)
2822                                 maxa = s;
2823                         if(*s != 0 && *s != r[1])
2824                                 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2825                         *s = r[1];
2826                 }
2827                 indgo[i] = n;
2828                 if(adb > 1)
2829                         Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2830                 return;
2831         nextn:;
2832         }
2833         error("Error; failure to place state %d\n", i);
2834 }
2835
2836 /*
2837  * finds the next i
2838  */
2839 int
2840 nxti(void)
2841 {
2842         int i, max, maxi;
2843
2844         max = 0;
2845         maxi = 0;
2846         for(i = 1; i <= nnonter; i++)
2847                 if(ggreed[i] >= max) {
2848                         max = ggreed[i];
2849                         maxi = -i;
2850                 }
2851         for(i = 0; i < nstate; ++i)
2852                 if(tystate[i] >= max) {
2853                         max = tystate[i];
2854                         maxi = i;
2855                 }
2856         if(nxdb)
2857                 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2858         if(max == 0)
2859                 return NOMORE;
2860         return maxi;
2861 }
2862
2863 /*
2864  * write summary
2865  */
2866 void
2867 osummary(void)
2868 {
2869
2870         int i, *p;
2871
2872         if(foutput == 0)
2873                 return;
2874         i = 0;
2875         for(p = maxa; p >= amem; p--)
2876                 if(*p == 0)
2877                         i++;
2878
2879         Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2880                 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2881         Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2882         Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2883 }
2884
2885 /*
2886  * this version is for C
2887  * write out the optimized parser
2888  */
2889 void
2890 aoutput(void)
2891 {
2892         Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2893         arout("yyact", amem, (maxa-amem)+1);
2894         arout("yypact", indgo, nstate);
2895         arout("yypgo", pgo, nnonter+1);
2896 }
2897
2898 void
2899 arout(char *s, int *v, int n)
2900 {
2901         int i;
2902
2903         Bprint(ftable, "short   %s[] =\n{", s);
2904         for(i = 0; i < n;) {
2905                 if(i%10 == 0)
2906                         Bprint(ftable, "\n");
2907                 Bprint(ftable, "%4d", v[i]);
2908                 i++;
2909                 if(i == n)
2910                         Bprint(ftable, "\n};\n");
2911                 else
2912                         Bprint(ftable, ",");
2913         }
2914 }
2915
2916 /*
2917  * read and convert an integer from the standard input
2918  * return the terminating character
2919  * blanks, tabs, and newlines are ignored
2920  */
2921 int
2922 gtnm(void)
2923 {
2924         int sign, val, c;
2925
2926         sign = 0;
2927         val = 0;
2928         while((c=Bgetrune(finput)) != Beof) {
2929                 if(isdigit(c)) {
2930                         val = val*10 + c-'0';
2931                         continue;
2932                 }
2933                 if(c == '-') {
2934                         sign = 1;
2935                         continue;
2936                 }
2937                 break;
2938         }
2939         if(sign)
2940                 val = -val;
2941         *pmem++ = val;
2942         if(pmem >= &mem0[MEMSIZE])
2943                 error("out of space");
2944         return c;
2945 }