]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/yacc.c
482823ea092d04b0c40d87ef397ea47ce3396da4
[plan9front.git] / sys / src / cmd / yacc.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune      Bungetc         /* ok for now. */
7
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET         ((32+NTERMS)/32)        /* BOTCH?? +31 */
12 #define BIT(a,i)        ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)     ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)       (((n)+32)/32)
15
16 #define PARSER          "/sys/lib/yaccpar"
17 #define PARSERS         "/sys/lib/yaccpars"
18 #define TEMPNAME        "y.tmp.XXXXXX"
19 #define ACTNAME         "y.acts.XXXXXX"
20 #define OFILE           "tab.c"
21 #define FILEU           "output"
22 #define FILED           "tab.h"
23 #define FILEDEBUG       "debug"
24
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31         ACTSIZE         = 40000,
32         MEMSIZE         = 40000,
33         NSTATES         = 2000,
34         NTERMS          = 511,
35         NPROD           = 1600,
36         NNONTERM        = 600,
37         TEMPSIZE        = 2000,
38         CNAMSZ          = 10000,
39         LSETSIZE        = 2400,
40         WSETSIZE        = 350,
41
42         NAMESIZE        = 50,
43         NTYPES          = 63,
44         ISIZE           = 400,
45
46         PRIVATE         = 0xE000,       /* unicode private use */
47
48         /* relationships which must hold:
49                 TBITSET ints must hold NTERMS+1 bits...
50                 WSETSIZE >= NNONTERM
51                 LSETSIZE >= NNONTERM
52                 TEMPSIZE >= NTERMS + NNONTERM + 1
53                 TEMPSIZE >= NSTATES
54         */
55
56         NTBASE          = 010000,
57         ERRCODE         = 8190,
58         ACCEPTCODE      = 8191,
59
60         NOASC           = 0,    /* no assoc. */
61         LASC            = 1,    /* left assoc. */
62         RASC            = 2,    /* right assoc. */
63         BASC            = 3,    /* binary assoc. */
64
65         /* flags for state generation */
66
67         DONE            = 0,
68         MUSTDO          = 1,
69         MUSTLOOKAHEAD   = 2,
70
71         /* flags for a rule having an action, and being reduced */
72
73         ACTFLAG         = 04,
74         REDFLAG         = 010,
75
76         /* output parser flags */
77         YYFLAG1         = -1000,
78
79         /* parse tokens */
80         IDENTIFIER      = PRIVATE,
81         MARK,
82         TERM,
83         LEFT,
84         RIGHT,
85         BINARY,
86         PREC,
87         LCURLY,
88         IDENTCOLON,
89         NUMBER,
90         START,
91         TYPEDEF,
92         TYPENAME,
93         UNION,
94
95         ENDFILE         = 0,
96
97         EMPTY           = 1,
98         WHOKNOWS        = 0,
99         OK              = 1,
100         NOMORE          = -1000,
101 };
102
103         /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i)        ((i)&03)
106 #define PLEVEL(i)       (((i)>>4)&077)
107 #define TYPE(i)         (((i)>>10)&077)
108
109         /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j)     i |= j
112 #define SETPLEV(i,j)    i |= (j<<4)
113 #define SETTYPE(i,j)    i |= (j<<10)
114
115         /* looping macros */
116
117 #define TLOOP(i)        for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)       for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)      for(i=s; i<nprod; i++)
120 #define SLOOP(i)        for(i=0; i<nstate; i++)
121 #define WSBUMP(x)       x++
122 #define WSLOOP(s,j)     for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)  for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)      for(i=0; i<tbitset; i++)
125
126         /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(x)      if(x) remove(x)
129
130         /* I/O descriptors */
131
132 Biobuf* faction;        /* file for saving actions */
133 Biobuf* fdefine;        /* file for #defines */
134 Biobuf* fdebug;         /* y.debug for strings for debugging */
135 Biobuf* ftable;         /* y.tab.c file */
136 Biobuf* ftemp;          /* tempfile to pass 2 */
137 Biobuf* finput;         /* input file */
138 Biobuf* foutput;        /* y.output file */
139
140         /* communication variables between various I/O routines */
141
142 char*   infile;                 /* input file name */
143 int     numbval;                /* value of an input number */
144 char    tokname[NAMESIZE+UTFmax+1];     /* input token name, slop for runes and 0 */
145
146         /* structure declarations */
147
148 typedef
149 struct
150 {
151         int     lset[TBITSET];
152 } Lkset;
153
154 typedef
155 struct
156 {
157         int*    pitem;
158         Lkset*  look;
159 } Item;
160
161 typedef
162 struct
163 {
164         char*   name;
165         int     value;
166 } Symb;
167
168 typedef
169 struct
170 {
171         int*    pitem;
172         int     flag;
173         Lkset   ws;
174 } Wset;
175
176         /* storage of names */
177
178 char    cnames[CNAMSZ];         /* place where token and nonterminal names are stored */
179 int     cnamsz = CNAMSZ;        /* size of cnames */
180 char*   cnamp = cnames;         /* place where next name is to be put in */
181 int     ndefout = 4;            /* number of defined symbols output */
182 char*   tempname;
183 char*   actname;
184 char    ttempname[] = TEMPNAME;
185 char    tactname[] = ACTNAME;
186 char*   parser = PARSER;
187 char*   yydebug;
188
189         /* storage of types */
190 int     ntypes;                 /* number of types defined */
191 char*   typeset[NTYPES];        /* pointers to type tags */
192
193         /* token information */
194
195 int     ntokens = 0 ;           /* number of tokens */
196 Symb    tokset[NTERMS];
197 int     toklev[NTERMS];         /* vector with the precedence of the terminals */
198
199         /* nonterminal information */
200
201 int     nnonter = -1;           /* the number of nonterminals */
202 Symb    nontrst[NNONTERM];
203 int     start;                  /* start symbol */
204
205         /* assigned token type values */
206 int     extval = 0;
207
208 char*   ytabc = OFILE;  /* name of y.tab.c */
209
210         /* grammar rule information */
211
212 int     mem0[MEMSIZE] ;         /* production storage */
213 int*    mem = mem0;
214 int     nprod = 1;              /* number of productions */
215 int*    prdptr[NPROD];          /* pointers to descriptions of productions */
216 int     levprd[NPROD];          /* precedence levels for the productions */
217 int     rlines[NPROD];          /* line number for this rule */
218
219         /* state information */
220
221 int     nstate = 0;             /* number of states */
222 Item*   pstate[NSTATES+2];      /* pointers to the descriptions of the states */
223 int     tystate[NSTATES];       /* contains type information about the states */
224 int     defact[NSTATES];        /* the default actions of states */
225 int     tstates[NTERMS];        /* states generated by terminal gotos */
226 int     ntstates[NNONTERM];     /* states generated by nonterminal gotos */
227 int     mstates[NSTATES];       /* chain of overflows of term/nonterm generation lists  */
228 int     lastred;                /* the number of the last reduction of a state */
229
230         /* lookahead set information */
231
232 Lkset   lkst[LSETSIZE];
233 int     nolook;                 /* flag to turn off lookahead computations */
234 int     tbitset;                /* size of lookahead sets */
235 int     nlset = 0;              /* next lookahead set index */
236 int     nolook = 0;             /* flag to suppress lookahead computations */
237 Lkset   clset;                  /* temporary storage for lookahead computations */
238
239         /* working set information */
240
241 Wset    wsets[WSETSIZE];
242 Wset*   cwp;
243
244         /* storage for action table */
245
246 int     amem[ACTSIZE];          /* action table storage */
247 int*    memp = amem;            /* next free action table position */
248 int     indgo[NSTATES];         /* index to the stored goto table */
249
250         /* temporary vector, indexable by states, terms, or ntokens */
251
252 int     temp1[TEMPSIZE];        /* temporary storage, indexed by terms + ntokens or states */
253 int     lineno = 1;             /* current input line number */
254 int     fatfl = 1;              /* if on, error is fatal */
255 int     nerrors = 0;            /* number of errors */
256
257         /* statistics collection variables */
258
259 int     zzgoent;
260 int     zzgobest;
261 int     zzacent;
262 int     zzexcp;
263 int     zzclose;
264 int     zzrrconf;
265 int     zzsrconf;
266
267 int*    ggreed = lkst[0].lset;
268 int*    pgo = wsets[0].ws.lset;
269 int*    yypgo = &nontrst[0].value;
270
271 int     maxspr = 0;             /* maximum spread of any entry */
272 int     maxoff = 0;             /* maximum offset into a array */
273 int*    pmem = mem0;
274 int*    maxa;
275 int     nxdb = 0;
276 int     adb = 0;
277
278
279         /* storage for information about the nonterminals */
280
281 int**   pres[NNONTERM+2];       /* vector of pointers to productions yielding each nonterminal */
282 Lkset*  pfirst[NNONTERM+2];     /* vector of pointers to first sets for each nonterminal */
283 int     pempty[NNONTERM+1];     /* vector of nonterminals nontrivially deriving e */
284
285         /* random stuff picked out from between functions */
286
287 int     indebug = 0;
288 Wset*   zzcwp = wsets;
289 int     zzgoent = 0;
290 int     zzgobest = 0;
291 int     zzacent = 0;
292 int     zzexcp = 0;
293 int     zzclose = 0;
294 int     zzsrconf = 0;
295 int*    zzmemsz = mem0;
296 int     zzrrconf = 0;
297 int     pidebug = 0;            /* debugging flag for putitem */
298 int     gsdebug = 0;
299 int     cldebug = 0;            /* debugging flag for closure */
300 int     pkdebug = 0;
301 int     g2debug = 0;
302
303 struct
304 {
305         char*   name;
306         long    value;
307 } resrv[] =
308 {
309         "binary",       BINARY,
310         "left",         LEFT,
311         "nonassoc",     BINARY,
312         "prec",         PREC,
313         "right",        RIGHT,
314         "start",        START,
315         "term",         TERM,
316         "token",        TERM,
317         "type",         TYPEDEF,
318         "union",        UNION,
319         0,
320 };
321
322         /* define functions */
323
324 void    main(int, char**);
325 void    others(void);
326 char*   chcopy(char*, char*);
327 char*   writem(int*);
328 char*   symnam(int);
329 void    summary(void);
330 void    error(char*, ...);
331 void    aryfil(int*, int, int);
332 int     setunion(int*, int*);
333 void    prlook(Lkset*);
334 void    cpres(void);
335 void    cpfir(void);
336 int     state(int);
337 void    putitem(int*, Lkset*);
338 void    cempty(void);
339 void    stagen(void);
340 void    closure(int);
341 Lkset*  flset(Lkset*);
342 void    cleantmp(void);
343 void    intr(void);
344 void    setup(int, char**);
345 void    finact(void);
346 int     defin(int, char*);
347 void    defout(int);
348 char*   cstash(char*);
349 long    gettok(void);
350 int     fdtype(int);
351 int     chfind(int, char*);
352 void    cpyunion(void);
353 void    cpycode(void);
354 int     skipcom(void);
355 void    cpyact(int);
356 void    openup(char*, int, int, int, char*);
357 void    output(void);
358 int     apack(int*, int);
359 void    go2out(void);
360 void    go2gen(int);
361 void    precftn(int, int, int);
362 void    wract(int);
363 void    wrstate(int);
364 void    warray(char*, int*, int);
365 void    hideprod(void);
366 void    callopt(void);
367 void    gin(int);
368 void    stin(int);
369 int     nxti(void);
370 void    osummary(void);
371 void    aoutput(void);
372 void    arout(char*, int*, int);
373 int     gtnm(void);
374
375 void
376 main(int argc, char *argv[])
377 {
378
379         setup(argc, argv);      /* initialize and read productions */
380         tbitset = NWORDS(ntokens);
381         cpres();                /* make table of which productions yield a given nonterminal */
382         cempty();               /* make a table of which nonterminals can match the empty string */
383         cpfir();                /* make a table of firsts of nonterminals */
384         stagen();               /* generate the states */
385         output();               /* write the states and the tables */
386         go2out();
387         hideprod();
388         summary();
389         callopt();
390         others();
391         exits(0);
392 }
393
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
398 others(void)
399 {
400         int c, i, j;
401
402         finput = Bopen(parser, OREAD);
403         if(finput == 0)
404                 error("cannot find parser %s", parser);
405         Blethal(finput, nil);
406         warray("yyr1", levprd, nprod);
407         aryfil(temp1, nprod, 0);
408         PLOOP(1, i)
409                 temp1[i] = prdptr[i+1]-prdptr[i]-2;
410         warray("yyr2", temp1, nprod);
411
412         aryfil(temp1, nstate, -1000);
413         TLOOP(i)
414                 for(j=tstates[i]; j!=0; j=mstates[j])
415                         temp1[j] = i;
416         NTLOOP(i)
417                 for(j=ntstates[i]; j!=0; j=mstates[j])
418                         temp1[j] = -i;
419         warray("yychk", temp1, nstate);
420         warray("yydef", defact, nstate);
421
422         /* put out token translation tables */
423         /* table 1 has 0-256 */
424         aryfil(temp1, 256, 0);
425         c = 0;
426         TLOOP(i) {
427                 j = tokset[i].value;
428                 if(j >= 0 && j < 256) {
429                         if(temp1[j]) {
430                                 print("yacc bug -- cant have 2 different Ts with same value\n");
431                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
432                                 nerrors++;
433                         }
434                         temp1[j] = i;
435                         if(j > c)
436                                 c = j;
437                 }
438         }
439         warray("yytok1", temp1, c+1);
440
441         /* table 2 has PRIVATE-PRIVATE+256 */
442         aryfil(temp1, 256, 0);
443         c = 0;
444         TLOOP(i) {
445                 j = tokset[i].value - PRIVATE;
446                 if(j >= 0 && j < 256) {
447                         if(temp1[j]) {
448                                 print("yacc bug -- cant have 2 different Ts with same value\n");
449                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
450                                 nerrors++;
451                         }
452                         temp1[j] = i;
453                         if(j > c)
454                                 c = j;
455                 }
456         }
457         warray("yytok2", temp1, c+1);
458
459         /* table 3 has everything else */
460         Bprint(ftable, "long    yytok3[] =\n{\n");
461         c = 0;
462         TLOOP(i) {
463                 j = tokset[i].value;
464                 if(j >= 0 && j < 256)
465                         continue;
466                 if(j >= PRIVATE && j < 256+PRIVATE)
467                         continue;
468
469                 Bprint(ftable, "%4d,%4d,", j, i);
470                 c++;
471                 if(c%5 == 0)
472                         Bprint(ftable, "\n");
473         }
474         Bprint(ftable, "%4d\n};\n", 0);
475
476         /* copy parser text */
477         Bprint(ftable, "\n#line\t1\t\"%s\"\n", parser);
478         while((c=Bgetrune(finput)) != Beof) {
479                 if(c == '$') {
480                         if((c = Bgetrune(finput)) != 'A')
481                                 Bputrune(ftable, '$');
482                         else { /* copy actions */
483                                 faction = Bopen(actname, OREAD);
484                                 if(faction == 0)
485                                         error("cannot reopen action tempfile");
486                                 Blethal(faction, nil);
487                                 while((c=Bgetrune(faction)) != Beof)
488                                         Bputrune(ftable, c);
489                                 Bterm(faction);
490                                 ZAPFILE(actname);
491                                 c = Bgetrune(finput);
492                         }
493                 }
494                 Bputrune(ftable, c);
495         }
496         Bterm(ftable);
497 }
498
499 /*
500  * copies string q into p, returning next free char ptr
501  */
502 char*
503 chcopy(char* p, char* q)
504 {
505         int c;
506
507         while(c = *q) {
508                 if(c == '"')
509                         *p++ = '\\';
510                 *p++ = c;
511                 q++;
512         }
513         *p = 0;
514         return p;
515 }
516
517 /*
518  * creates output string for item pointed to by pp
519  */
520 char*
521 writem(int *pp)
522 {
523         int i,*p;
524         static char sarr[ISIZE];
525         char* q;
526
527         for(p=pp; *p>0; p++)
528                 ;
529         p = prdptr[-*p];
530         q = chcopy(sarr, nontrst[*p-NTBASE].name);
531         q = chcopy(q, ": ");
532         for(;;) {
533                 *q = ' ';
534                 p++;
535                 if(p == pp)
536                         *q = '.';
537                 q++;
538                 *q = '\0';
539                 i = *p;
540                 if(i <= 0)
541                         break;
542                 q = chcopy(q, symnam(i));
543                 if(q > &sarr[ISIZE-30])
544                         error("item too big");
545         }
546
547         /* an item calling for a reduction */
548         i = *pp;
549         if(i < 0 ) {
550                 q = chcopy(q, "    (");
551                 sprint(q, "%d)", -i);
552         }
553         return sarr;
554 }
555
556 /*
557  * return a pointer to the name of symbol i
558  */
559 char*
560 symnam(int i)
561 {
562         char* cp;
563
564         cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
565         if(*cp == ' ')
566                 cp++;
567         return cp;
568 }
569
570 /*
571  * output the summary on y.output
572  */
573 void
574 summary(void)
575 {
576
577         if(foutput != 0) {
578                 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
579                         ntokens, NTERMS, nnonter, NNONTERM);
580                 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
581                         nprod, NPROD, nstate, NSTATES);
582                 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
583                         zzsrconf, zzrrconf);
584                 Bprint(foutput, "%d/%d working sets used\n",
585                         (int)(zzcwp-wsets), WSETSIZE);
586                 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
587                         (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
588                 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
589                 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
590                 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
591                 Bprint(foutput, "%d goto entries\n", zzgoent);
592                 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
593         }
594         if(zzsrconf != 0 || zzrrconf != 0) {
595                 print("\nconflicts: ");
596                 if(zzsrconf)
597                         print("%d shift/reduce", zzsrconf);
598                 if(zzsrconf && zzrrconf)
599                         print(", ");
600                 if(zzrrconf)
601                         print("%d reduce/reduce", zzrrconf);
602                 print("\n");
603         }
604         if(ftemp != 0) {
605                 Bterm(ftemp);
606                 ftemp = 0;
607         }
608         if(fdefine != 0) {
609                 Bterm(fdefine);
610                 fdefine = 0;
611         }
612 }
613
614 /*
615  * write out error comment -- NEEDS WORK
616  */
617 void
618 error(char *s, ...)
619 {
620
621         nerrors++;
622         fprint(2, "\n fatal error:");
623         fprint(2, s, (&s)[1]);
624         fprint(2, ", %s:%d\n", infile, lineno);
625         if(!fatfl)
626                 return;
627         summary();
628         cleantmp();
629         exits("error");
630 }
631
632 /*
633  * set elements 0 through n-1 to c
634  */
635 void
636 aryfil(int *v, int n, int c)
637 {
638         int i;
639
640         for(i=0; i<n; i++)
641                 v[i] = c;
642 }
643
644 /*
645  * set a to the union of a and b
646  * return 1 if b is not a subset of a, 0 otherwise
647  */
648 int
649 setunion(int *a, int *b)
650 {
651         int i, x, sub;
652
653         sub = 0;
654         SETLOOP(i) {
655                 x = *a;
656                 *a |= *b;
657                 if(*a != x)
658                         sub = 1;
659                 a++;
660                 b++;
661         }
662         return sub;
663 }
664
665 void
666 prlook(Lkset* p)
667 {
668         int j, *pp;
669
670         pp = p->lset;
671         if(pp == 0)
672                 Bprint(foutput, "\tNULL");
673         else {
674                 Bprint(foutput, " { ");
675                 TLOOP(j)
676                         if(BIT(pp,j))
677                                 Bprint(foutput, "%s ", symnam(j));
678                 Bprint(foutput, "}");
679         }
680 }
681
682 /*
683  * compute an array with the beginnings of  productions yielding given nonterminals
684  * The array pres points to these lists
685  * the array pyield has the lists: the total size is only NPROD+1
686  */
687 void
688 cpres(void)
689 {
690         int c, j, i, **pmem;
691         static int *pyield[NPROD];
692
693         pmem = pyield;
694         NTLOOP(i) {
695                 c = i+NTBASE;
696                 pres[i] = pmem;
697                 fatfl = 0;      /* make undefined  symbols  nonfatal */
698                 PLOOP(0, j)
699                         if(*prdptr[j] == c)
700                                 *pmem++ =  prdptr[j]+1;
701                 if(pres[i] == pmem)
702                         error("nonterminal %s not defined!", nontrst[i].name);
703         }
704         pres[i] = pmem;
705         fatfl = 1;
706         if(nerrors) {
707                 summary();
708                 cleantmp();
709                 exits("error");
710         }
711         if(pmem != &pyield[nprod])
712                 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
713 }
714
715 /*
716  * compute an array with the first of nonterminals
717  */
718 void
719 cpfir(void)
720 {
721         int *p, **s, i, **t, ch, changes;
722
723         zzcwp = &wsets[nnonter];
724         NTLOOP(i) {
725                 aryfil(wsets[i].ws.lset, tbitset, 0);
726                 t = pres[i+1];
727                 /* initially fill the sets */
728                 for(s=pres[i]; s<t; ++s)
729                         for(p = *s; (ch = *p) > 0; ++p) {
730                                 if(ch < NTBASE) {
731                                         SETBIT(wsets[i].ws.lset, ch);
732                                         break;
733                                 }
734                                 if(!pempty[ch-NTBASE])
735                                         break;
736                         }
737         }
738
739         /* now, reflect transitivity */
740         changes = 1;
741         while(changes) {
742                 changes = 0;
743                 NTLOOP(i) {
744                         t = pres[i+1];
745                         for(s = pres[i]; s < t; ++s)
746                                 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
747                                         changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
748                                         if(!pempty[ch])
749                                                 break;
750                                 }
751                 }
752         }
753
754         NTLOOP(i)
755                 pfirst[i] = flset(&wsets[i].ws);
756         if(!indebug)
757                 return;
758         if(foutput != 0)
759                 NTLOOP(i) {
760                         Bprint(foutput, "\n%s: ", nontrst[i].name);
761                         prlook(pfirst[i]);
762                         Bprint(foutput, " %d\n", pempty[i]);
763                 }
764 }
765
766 /*
767  * sorts last state,and sees if it equals earlier ones. returns state number
768  */
769 int
770 state(int c)
771 {
772         Item *p1, *p2, *k, *l, *q1, *q2;
773         int size1, size2, i;
774
775         p1 = pstate[nstate];
776         p2 = pstate[nstate+1];
777         if(p1 == p2)
778                 return 0;       /* null state */
779         /* sort the items */
780         for(k = p2-1; k > p1; k--)      /* make k the biggest */
781                 for(l = k-1; l >= p1; --l)
782                         if(l->pitem > k->pitem) {
783                                 int *s;
784                                 Lkset *ss;
785
786                                 s = k->pitem;
787                                 k->pitem = l->pitem;
788                                 l->pitem = s;
789                                 ss = k->look;
790                                 k->look = l->look;
791                                 l->look = ss;
792                         }
793         size1 = p2 - p1;        /* size of state */
794
795         for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
796                 /* get ith state */
797                 q1 = pstate[i];
798                 q2 = pstate[i+1];
799                 size2 = q2 - q1;
800                 if(size1 != size2)
801                         continue;
802                 k = p1;
803                 for(l = q1; l < q2; l++) {
804                         if(l->pitem != k->pitem)
805                                 break;
806                         k++;
807                 }
808                 if(l != q2)
809                         continue;
810                 /* found it */
811                 pstate[nstate+1] = pstate[nstate];      /* delete last state */
812                 /* fix up lookaheads */
813                 if(nolook)
814                         return i;
815                 for(l = q1, k = p1; l < q2; ++l, ++k ) {
816                         int s;
817
818                         SETLOOP(s)
819                                 clset.lset[s] = l->look->lset[s];
820                         if(setunion(clset.lset, k->look->lset)) {
821                                 tystate[i] = MUSTDO;
822                                 /* register the new set */
823                                 l->look = flset( &clset );
824                         }
825                 }
826                 return i;
827         }
828         /* state is new */
829         if(nolook)
830                 error("yacc state/nolook error");
831         pstate[nstate+2] = p2;
832         if(nstate+1 >= NSTATES)
833                 error("too many states");
834         if(c >= NTBASE) {
835                 mstates[nstate] = ntstates[c-NTBASE];
836                 ntstates[c-NTBASE] = nstate;
837         } else {
838                 mstates[nstate] = tstates[c];
839                 tstates[c] = nstate;
840         }
841         tystate[nstate] = MUSTDO;
842         return nstate++;
843 }
844
845 void
846 putitem(int *ptr, Lkset *lptr)
847 {
848         Item *j;
849
850         if(pidebug && foutput != 0)
851                 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
852         j = pstate[nstate+1];
853         j->pitem = ptr;
854         if(!nolook)
855                 j->look = flset(lptr);
856         pstate[nstate+1] = ++j;
857         if((int*)j > zzmemsz) {
858                 zzmemsz = (int*)j;
859                 if(zzmemsz >=  &mem0[MEMSIZE])
860                         error("out of state space");
861         }
862 }
863
864 /*
865  * mark nonterminals which derive the empty string
866  * also, look for nonterminals which don't derive any token strings
867  */
868 void
869 cempty(void)
870 {
871
872         int i, *p;
873
874         /* first, use the array pempty to detect productions that can never be reduced */
875         /* set pempty to WHONOWS */
876         aryfil(pempty, nnonter+1, WHOKNOWS);
877
878         /* now, look at productions, marking nonterminals which derive something */
879 more:
880         PLOOP(0, i) {
881                 if(pempty[*prdptr[i] - NTBASE])
882                         continue;
883                 for(p = prdptr[i]+1; *p >= 0; ++p)
884                         if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
885                                 break;
886                 /* production can be derived */
887                 if(*p < 0) {
888                         pempty[*prdptr[i]-NTBASE] = OK;
889                         goto more;
890                 }
891         }
892
893         /* now, look at the nonterminals, to see if they are all OK */
894         NTLOOP(i) {
895                 /* the added production rises or falls as the start symbol ... */
896                 if(i == 0)
897                         continue;
898                 if(pempty[i] != OK) {
899                         fatfl = 0;
900                         error("nonterminal %s never derives any token string", nontrst[i].name);
901                 }
902         }
903
904         if(nerrors) {
905                 summary();
906                 cleantmp();
907                 exits("error");
908         }
909
910         /* now, compute the pempty array, to see which nonterminals derive the empty string */
911         /* set pempty to WHOKNOWS */
912         aryfil( pempty, nnonter+1, WHOKNOWS);
913
914         /* loop as long as we keep finding empty nonterminals */
915
916 again:
917         PLOOP(1, i) {
918                 /* not known to be empty */
919                 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
920                         for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
921                                 ;
922                         /* we have a nontrivially empty nonterminal */
923                         if(*p < 0) {
924                                 pempty[*prdptr[i]-NTBASE] = EMPTY;
925                                 /* got one ... try for another */
926                                 goto again;
927                         }
928                 }
929         }
930 }
931
932 /*
933  * generate the states
934  */
935 void
936 stagen(void)
937 {
938
939         int c, i, j, more;
940         Wset *p, *q;
941
942         /* initialize */
943         nstate = 0;
944
945         /* THIS IS FUNNY from the standpoint of portability
946          * it represents the magic moment when the mem0 array, which has
947          * been holding the productions, starts to hold item pointers, of a
948          * different type...
949          * someday, alloc should be used to allocate all this stuff... for now, we
950          * accept that if pointers don't fit in integers, there is a problem...
951          */
952
953         pstate[0] = pstate[1] = (Item*)mem;
954         aryfil(clset.lset, tbitset, 0);
955         putitem(prdptr[0]+1, &clset);
956         tystate[0] = MUSTDO;
957         nstate = 1;
958         pstate[2] = pstate[1];
959
960         aryfil(amem, ACTSIZE, 0);
961
962         /* now, the main state generation loop */
963         for(more=1; more;) {
964                 more = 0;
965                 SLOOP(i) {
966                         if(tystate[i] != MUSTDO)
967                                 continue;
968                         tystate[i] = DONE;
969                         aryfil(temp1, nnonter+1, 0);
970                         /* take state i, close it, and do gotos */
971                         closure(i);
972                         /* generate goto's */
973                         WSLOOP(wsets, p) {
974                                 if(p->flag)
975                                         continue;
976                                 p->flag = 1;
977                                 c = *(p->pitem);
978                                 if(c <= 1) {
979                                         if(pstate[i+1]-pstate[i] <= p-wsets)
980                                                 tystate[i] = MUSTLOOKAHEAD;
981                                         continue;
982                                 }
983                                 /* do a goto on c */
984                                 WSLOOP(p, q)
985                                         /* this item contributes to the goto */
986                                         if(c == *(q->pitem)) {
987                                                 putitem(q->pitem+1, &q->ws);
988                                                 q->flag = 1;
989                                         }
990                                 if(c < NTBASE)
991                                         state(c);       /* register new state */
992                                 else
993                                         temp1[c-NTBASE] = state(c);
994                         }
995                         if(gsdebug && foutput != 0) {
996                                 Bprint(foutput, "%d: ", i);
997                                 NTLOOP(j)
998                                         if(temp1[j])
999                                                 Bprint(foutput, "%s %d, ",
1000                                                 nontrst[j].name, temp1[j]);
1001                                 Bprint(foutput, "\n");
1002                         }
1003                         indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1004                         /* do some more */
1005                         more = 1;
1006                 }
1007         }
1008 }
1009
1010 /*
1011  * generate the closure of state i
1012  */
1013 void
1014 closure(int i)
1015 {
1016
1017         Wset *u, *v;
1018         Item *p, *q;
1019         int c, ch, work, k, *pi, **s, **t;
1020
1021         zzclose++;
1022
1023         /* first, copy kernel of state i to wsets */
1024         cwp = wsets;
1025         ITMLOOP(i, p, q) {
1026                 cwp->pitem = p->pitem;
1027                 cwp->flag = 1;                  /* this item must get closed */
1028                 SETLOOP(k)
1029                         cwp->ws.lset[k] = p->look->lset[k];
1030                 WSBUMP(cwp);
1031         }
1032
1033         /* now, go through the loop, closing each item */
1034         work = 1;
1035         while(work) {
1036                 work = 0;
1037                 WSLOOP(wsets, u) {
1038                         if(u->flag == 0)
1039                                 continue;
1040                         /* dot is before c */
1041                         c = *(u->pitem);
1042                         if(c < NTBASE) {
1043                                 u->flag = 0;
1044                                 /* only interesting case is where . is before nonterminal */
1045                                 continue;
1046                         }
1047
1048                         /* compute the lookahead */
1049                         aryfil(clset.lset, tbitset, 0);
1050
1051                         /* find items involving c */
1052                         WSLOOP(u, v)
1053                                 if(v->flag == 1 && *(pi=v->pitem) == c) {
1054                                         v->flag = 0;
1055                                         if(nolook)
1056                                                 continue;
1057                                         while((ch = *++pi) > 0) {
1058                                                 /* terminal symbol */
1059                                                 if(ch < NTBASE) {
1060                                                         SETBIT(clset.lset, ch);
1061                                                         break;
1062                                                 }
1063                                                 /* nonterminal symbol */
1064                                                 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1065                                                 if(!pempty[ch-NTBASE])
1066                                                         break;
1067                                         }
1068                                         if(ch <= 0)
1069                                                 setunion(clset.lset, v->ws.lset);
1070                                 }
1071
1072                         /*
1073                          * now loop over productions derived from c
1074                          * c is now nonterminal number
1075                          */
1076                         c -= NTBASE;
1077                         t = pres[c+1];
1078                         for(s = pres[c]; s < t; ++s) {
1079                                 /*
1080                                  * put these items into the closure
1081                                  * is the item there
1082                                  */
1083                                 WSLOOP(wsets, v)
1084                                         /* yes, it is there */
1085                                         if(v->pitem == *s) {
1086                                                 if(nolook)
1087                                                         goto nexts;
1088                                                 if(setunion(v->ws.lset, clset.lset))
1089                                                         v->flag = work = 1;
1090                                                 goto nexts;
1091                                         }
1092
1093                                 /*  not there; make a new entry */
1094                                 if(cwp-wsets+1 >= WSETSIZE)
1095                                         error( "working set overflow");
1096                                 cwp->pitem = *s;
1097                                 cwp->flag = 1;
1098                                 if(!nolook) {
1099                                         work = 1;
1100                                         SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1101                                 }
1102                                 WSBUMP(cwp);
1103
1104                         nexts:;
1105                         }
1106                 }
1107         }
1108
1109         /* have computed closure; flags are reset; return */
1110         if(cwp > zzcwp)
1111                 zzcwp = cwp;
1112         if(cldebug && foutput != 0) {
1113                 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1114                 WSLOOP(wsets, u) {
1115                         if(u->flag)
1116                                 Bprint(foutput, "flag set!\n");
1117                         u->flag = 0;
1118                         Bprint(foutput, "\t%s", writem(u->pitem));
1119                         prlook(&u->ws);
1120                         Bprint(foutput, "\n");
1121                 }
1122         }
1123 }
1124
1125 /*
1126  * decide if the lookahead set pointed to by p is known
1127  * return pointer to a perminent location for the set
1128  */
1129 Lkset*
1130 flset(Lkset *p)
1131 {
1132         Lkset *q;
1133         int *u, *v, *w, j;
1134
1135         for(q = &lkst[nlset]; q-- > lkst;) {
1136                 u = p->lset;
1137                 v = q->lset;
1138                 w = &v[tbitset];
1139                 while(v < w)
1140                         if(*u++ != *v++)
1141                                 goto more;
1142                 /* we have matched */
1143                 return q;
1144         more:;
1145         }
1146         /* add a new one */
1147         q = &lkst[nlset++];
1148         if(nlset >= LSETSIZE)
1149                 error("too many lookahead sets");
1150         SETLOOP(j)
1151                 q->lset[j] = p->lset[j];
1152         return q;
1153 }
1154
1155 void
1156 cleantmp(void)
1157 {
1158         ZAPFILE(actname);
1159         ZAPFILE(tempname);
1160 }
1161
1162 void
1163 intr(void)
1164 {
1165         cleantmp();
1166         exits("interrupted");
1167 }
1168
1169 void
1170 usage(void)
1171 {
1172         fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1173         exits("usage");
1174 }
1175
1176 void
1177 setup(int argc, char *argv[])
1178 {
1179         long c, t;
1180         int i, j, lev, ty, ytab, *p;
1181         int vflag, dflag, stem;
1182         char actnm[8], *stemc, *s, dirbuf[128];
1183
1184         ytab = 0;
1185         vflag = 0;
1186         dflag = 0;
1187         stem = 0;
1188         stemc = "y";
1189         foutput = 0;
1190         fdefine = 0;
1191         fdebug = 0;
1192         ARGBEGIN{
1193         case 'v':
1194         case 'V':
1195                 vflag++;
1196                 break;
1197         case 'D':
1198                 yydebug = EARGF(usage());
1199                 break;
1200         case 'd':
1201                 dflag++;
1202                 break;
1203         case 'o':
1204                 ytab++;
1205                 ytabc = EARGF(usage());
1206                 break;
1207         case 's':
1208                 stem++;
1209                 stemc = ARGF();
1210                 break;
1211         case 'S':
1212                 parser = PARSERS;
1213                 break;
1214         default:
1215                 error("illegal option: %c", ARGC());
1216         }ARGEND
1217         openup(stemc, dflag, vflag, ytab, ytabc);
1218
1219         ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1220         faction = Bopen(actname = mktemp(tactname), OWRITE);
1221         if(ftemp == 0 || faction == 0)
1222                 error("cannot open temp file");
1223         Blethal(ftemp, nil);
1224         Blethal(faction, nil);
1225         if(argc < 1)
1226                 error("no input file");
1227         infile = argv[0];
1228         if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1229                 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1230                 s = malloc(i);
1231                 if(s != nil){
1232                         snprint(s, i, "%s/%s", dirbuf, infile);
1233                         cleanname(s);
1234                         infile = s;
1235                 }
1236         }
1237         finput = Bopen(infile, OREAD);
1238         if(finput == 0)
1239                 error("cannot open '%s'", argv[0]);
1240         Blethal(finput, nil);
1241         cnamp = cnames;
1242
1243         defin(0, "$end");
1244         extval = PRIVATE;       /* tokens start in unicode 'private use' */
1245         defin(0, "error");
1246         defin(1, "$accept");
1247         defin(0, "$unk");
1248         mem = mem0;
1249         i = 0;
1250
1251         for(t = gettok(); t != MARK && t != ENDFILE;)
1252         switch(t) {
1253         case ';':
1254                 t = gettok();
1255                 break;
1256
1257         case START:
1258                 if(gettok() != IDENTIFIER)
1259                         error("bad %%start construction");
1260                 start = chfind(1, tokname);
1261                 t = gettok();
1262                 continue;
1263
1264         case TYPEDEF:
1265                 if(gettok() != TYPENAME)
1266                         error("bad syntax in %%type");
1267                 ty = numbval;
1268                 for(;;) {
1269                         t = gettok();
1270                         switch(t) {
1271                         case IDENTIFIER:
1272                                 if((t=chfind(1, tokname)) < NTBASE) {
1273                                         j = TYPE(toklev[t]);
1274                                         if(j != 0 && j != ty)
1275                                                 error("type redeclaration of token %s",
1276                                                         tokset[t].name);
1277                                         else
1278                                                 SETTYPE(toklev[t], ty);
1279                                 } else {
1280                                         j = nontrst[t-NTBASE].value;
1281                                         if(j != 0 && j != ty)
1282                                                 error("type redeclaration of nonterminal %s",
1283                                                         nontrst[t-NTBASE].name );
1284                                         else
1285                                                 nontrst[t-NTBASE].value = ty;
1286                                 }
1287                         case ',':
1288                                 continue;
1289                         case ';':
1290                                 t = gettok();
1291                         default:
1292                                 break;
1293                         }
1294                         break;
1295                 }
1296                 continue;
1297
1298         case UNION:
1299                 /* copy the union declaration to the output */
1300                 cpyunion();
1301                 t = gettok();
1302                 continue;
1303
1304         case LEFT:
1305         case BINARY:
1306         case RIGHT:
1307                 i++;
1308
1309         case TERM:
1310                 /* nonzero means new prec. and assoc. */
1311                 lev = t-TERM;
1312                 ty = 0;
1313
1314                 /* get identifiers so defined */
1315                 t = gettok();
1316
1317                 /* there is a type defined */
1318                 if(t == TYPENAME) {
1319                         ty = numbval;
1320                         t = gettok();
1321                 }
1322                 for(;;) {
1323                         switch(t) {
1324                         case ',':
1325                                 t = gettok();
1326                                 continue;
1327
1328                         case ';':
1329                                 break;
1330
1331                         case IDENTIFIER:
1332                                 j = chfind(0, tokname);
1333                                 if(j >= NTBASE)
1334                                         error("%s defined earlier as nonterminal", tokname);
1335                                 if(lev) {
1336                                         if(ASSOC(toklev[j]))
1337                                                 error("redeclaration of precedence of %s", tokname);
1338                                         SETASC(toklev[j], lev);
1339                                         SETPLEV(toklev[j], i);
1340                                 }
1341                                 if(ty) {
1342                                         if(TYPE(toklev[j]))
1343                                                 error("redeclaration of type of %s", tokname);
1344                                         SETTYPE(toklev[j],ty);
1345                                 }
1346                                 t = gettok();
1347                                 if(t == NUMBER) {
1348                                         tokset[j].value = numbval;
1349                                         if(j < ndefout && j > 3)
1350                                                 error("please define type number of %s earlier",
1351                                                         tokset[j].name);
1352                                         t = gettok();
1353                                 }
1354                                 continue;
1355                         }
1356                         break;
1357                 }
1358                 continue;
1359
1360         case LCURLY:
1361                 defout(0);
1362                 cpycode();
1363                 t = gettok();
1364                 continue;
1365
1366         default:
1367                 error("syntax error");
1368         }
1369         if(t == ENDFILE)
1370                 error("unexpected EOF before %%");
1371
1372         /* t is MARK */
1373         Bprint(ftable, "extern  int     yyerrflag;\n");
1374         Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1375         Bprint(ftable, "#define YYMAXDEPTH      150\n");
1376         Bprint(ftable, "#endif\n" );
1377         if(!ntypes) {
1378                 Bprint(ftable, "#ifndef YYSTYPE\n");
1379                 Bprint(ftable, "#define YYSTYPE int\n");
1380                 Bprint(ftable, "#endif\n");
1381         }
1382         Bprint(ftable, "YYSTYPE yylval;\n");
1383         Bprint(ftable, "YYSTYPE yyval;\n");
1384
1385         prdptr[0] = mem;
1386
1387         /* added production */
1388         *mem++ = NTBASE;
1389
1390         /* if start is 0, we will overwrite with the lhs of the first rule */
1391         *mem++ = start;
1392         *mem++ = 1;
1393         *mem++ = 0;
1394         prdptr[1] = mem;
1395         while((t=gettok()) == LCURLY)
1396                 cpycode();
1397         if(t != IDENTCOLON)
1398                 error("bad syntax on first rule");
1399
1400         if(!start)
1401                 prdptr[0][1] = chfind(1, tokname);
1402
1403         /* read rules */
1404         while(t != MARK && t != ENDFILE) {
1405                 /* process a rule */
1406                 rlines[nprod] = lineno;
1407                 if(t == '|')
1408                         *mem++ = *prdptr[nprod-1];
1409                 else
1410                         if(t == IDENTCOLON) {
1411                                 *mem = chfind(1, tokname);
1412                                 if(*mem < NTBASE)
1413                                         error("token illegal on LHS of grammar rule");
1414                                 mem++;
1415                         } else
1416                                 error("illegal rule: missing semicolon or | ?");
1417                 /* read rule body */
1418                 t = gettok();
1419
1420         more_rule:
1421                 while(t == IDENTIFIER) {
1422                         *mem = chfind(1, tokname);
1423                         if(*mem < NTBASE)
1424                                 levprd[nprod] = toklev[*mem];
1425                         mem++;
1426                         t = gettok();
1427                 }
1428                 if(t == PREC) {
1429                         if(gettok() != IDENTIFIER)
1430                                 error("illegal %%prec syntax");
1431                         j = chfind(2, tokname);
1432                         if(j >= NTBASE)
1433                                 error("nonterminal %s illegal after %%prec",
1434                                         nontrst[j-NTBASE].name);
1435                         levprd[nprod] = toklev[j];
1436                         t = gettok();
1437                 }
1438                 if(t == '=') {
1439                         levprd[nprod] |= ACTFLAG;
1440                         Bprint(faction, "\ncase %d:", nprod);
1441                         cpyact(mem-prdptr[nprod]-1);
1442                         Bprint(faction, " break;");
1443                         if((t=gettok()) == IDENTIFIER) {
1444
1445                                 /* action within rule... */
1446                                 sprint(actnm, "$$%d", nprod);
1447
1448                                 /* make it a nonterminal */
1449                                 j = chfind(1, actnm);
1450
1451                                 /*
1452                                  * the current rule will become rule number nprod+1
1453                                  * move the contents down, and make room for the null
1454                                  */
1455                                 for(p = mem; p >= prdptr[nprod]; --p)
1456                                         p[2] = *p;
1457                                 mem += 2;
1458
1459                                 /* enter null production for action */
1460                                 p = prdptr[nprod];
1461                                 *p++ = j;
1462                                 *p++ = -nprod;
1463
1464                                 /* update the production information */
1465                                 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1466                                 levprd[nprod] = ACTFLAG;
1467                                 if(++nprod >= NPROD)
1468                                         error("more than %d rules", NPROD);
1469                                 prdptr[nprod] = p;
1470
1471                                 /* make the action appear in the original rule */
1472                                 *mem++ = j;
1473
1474                                 /* get some more of the rule */
1475                                 goto more_rule;
1476                         }
1477                 }
1478
1479                 while(t == ';')
1480                         t = gettok();
1481                 *mem++ = -nprod;
1482
1483                 /* check that default action is reasonable */
1484                 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1485
1486                         /* no explicit action, LHS has value */
1487                         int tempty;
1488
1489                         tempty = prdptr[nprod][1];
1490                         if(tempty < 0)
1491                                 error("must return a value, since LHS has a type");
1492                         else
1493                                 if(tempty >= NTBASE)
1494                                         tempty = nontrst[tempty-NTBASE].value;
1495                                 else
1496                                         tempty = TYPE(toklev[tempty]);
1497                         if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1498                                 error("default action causes potential type clash");
1499                 }
1500                 nprod++;
1501                 if(nprod >= NPROD)
1502                         error("more than %d rules", NPROD);
1503                 prdptr[nprod] = mem;
1504                 levprd[nprod] = 0;
1505         }
1506
1507         /* end of all rules */
1508         defout(1);
1509
1510         finact();
1511         if(t == MARK) {
1512                 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1513                 while((c=Bgetrune(finput)) != Beof)
1514                         Bputrune(ftable, c);
1515         }
1516         Bterm(finput);
1517 }
1518
1519 /*
1520  * finish action routine
1521  */
1522 void
1523 finact(void)
1524 {
1525
1526         Bterm(faction);
1527         Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1528         Bprint(ftable, "#define YYERRCODE %d\n", 2);
1529 }
1530
1531 /*
1532  * define s to be a terminal if t=0
1533  * or a nonterminal if t=1
1534  */
1535 int
1536 defin(int nt, char *s)
1537 {
1538         int val;
1539         Rune rune;
1540
1541         val = 0;
1542         if(nt) {
1543                 nnonter++;
1544                 if(nnonter >= NNONTERM)
1545                         error("too many nonterminals, limit %d",NNONTERM);
1546                 nontrst[nnonter].name = cstash(s);
1547                 return NTBASE + nnonter;
1548         }
1549
1550         /* must be a token */
1551         ntokens++;
1552         if(ntokens >= NTERMS)
1553                 error("too many terminals, limit %d", NTERMS);
1554         tokset[ntokens].name = cstash(s);
1555
1556         /* establish value for token */
1557         /* single character literal */
1558         if(s[0] == ' ') {
1559                 val = chartorune(&rune, &s[1]);
1560                 if(s[val+1] == 0) {
1561                         val = rune;
1562                         goto out;
1563                 }
1564         }
1565
1566         /* escape sequence */
1567         if(s[0] == ' ' && s[1] == '\\') {
1568                 if(s[3] == 0) {
1569                         /* single character escape sequence */
1570                         switch(s[2]) {
1571                         case 'n':       val = '\n'; break;
1572                         case 'r':       val = '\r'; break;
1573                         case 'b':       val = '\b'; break;
1574                         case 't':       val = '\t'; break;
1575                         case 'f':       val = '\f'; break;
1576                         case '\'':      val = '\''; break;
1577                         case '"':       val = '"'; break;
1578                         case '\\':      val = '\\'; break;
1579                         default:        error("invalid escape");
1580                         }
1581                         goto out;
1582                 }
1583
1584                 /* \nnn sequence */
1585                 if(s[2] >= '0' && s[2] <= '7') {
1586                         if(s[3] < '0' ||
1587                            s[3] > '7' ||
1588                            s[4] < '0' ||
1589                            s[4] > '7' ||
1590                            s[5] != 0)
1591                                 error("illegal \\nnn construction");
1592                         val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1593                         if(val == 0)
1594                                 error("'\\000' is illegal");
1595                         goto out;
1596                 }
1597                 error("unknown escape");
1598         }
1599         val = extval++;
1600
1601 out:
1602         tokset[ntokens].value = val;
1603         toklev[ntokens] = 0;
1604         return ntokens;
1605 }
1606
1607 /*
1608  * write out the defines (at the end of the declaration section)
1609  */
1610 void
1611 defout(int last)
1612 {
1613         int i, c;
1614         char sar[NAMESIZE+10];
1615
1616         for(i=ndefout; i<=ntokens; i++) {
1617                 /* non-literals */
1618                 c = tokset[i].name[0];
1619                 if(c != ' ' && c != '$') {
1620                         Bprint(ftable, "#define %s      %d\n",
1621                                 tokset[i].name, tokset[i].value);
1622                         if(fdefine)
1623                                 Bprint(fdefine, "#define\t%s\t%d\n",
1624                                         tokset[i].name, tokset[i].value);
1625                 }
1626         }
1627         ndefout = ntokens+1;
1628         if(last && fdebug) {
1629                 Bprint(fdebug, "char*   yytoknames[] =\n{\n");
1630                 TLOOP(i) {
1631                         if(tokset[i].name) {
1632                                 chcopy(sar, tokset[i].name);
1633                                 Bprint(fdebug, "\t\"%s\",\n", sar);
1634                                 continue;
1635                         }
1636                         Bprint(fdebug, "\t0,\n");
1637                 }
1638                 Bprint(fdebug, "};\n");
1639         }
1640 }
1641
1642 char*
1643 cstash(char *s)
1644 {
1645         char *temp;
1646
1647         temp = cnamp;
1648         do {
1649                 if(cnamp >= &cnames[cnamsz])
1650                         error("too many characters in id's and literals");
1651                 else
1652                         *cnamp++ = *s;
1653         } while(*s++);
1654         return temp;
1655 }
1656
1657 long
1658 gettok(void)
1659 {
1660         long c;
1661         Rune rune;
1662         int i, base, match, reserve;
1663         static int peekline;
1664
1665 begin:
1666         reserve = 0;
1667         lineno += peekline;
1668         peekline = 0;
1669         c = Bgetrune(finput);
1670         while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1671                 if(c == '\n')
1672                         lineno++;
1673                 c = Bgetrune(finput);
1674         }
1675
1676         /* skip comment */
1677         if(c == '/') {
1678                 lineno += skipcom();
1679                 goto begin;
1680         }
1681         switch(c) {
1682         case Beof:
1683                 return ENDFILE;
1684
1685         case '{':
1686                 Bungetrune(finput);
1687                 return '=';
1688
1689         case '<':
1690                 /* get, and look up, a type name (union member name) */
1691                 i = 0;
1692                 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1693                         rune = c;
1694                         c = runetochar(&tokname[i], &rune);
1695                         if(i < NAMESIZE)
1696                                 i += c;
1697                 }
1698                 if(c != '>')
1699                         error("unterminated < ... > clause");
1700                 tokname[i] = 0;
1701                 for(i=1; i<=ntypes; i++)
1702                         if(!strcmp(typeset[i], tokname)) {
1703                                 numbval = i;
1704                                 return TYPENAME;
1705                         }
1706                 ntypes++;
1707                 numbval = ntypes;
1708                 typeset[numbval] = cstash(tokname);
1709                 return TYPENAME;
1710
1711         case '"':
1712         case '\'':
1713                 match = c;
1714                 tokname[0] = ' ';
1715                 i = 1;
1716                 for(;;) {
1717                         c = Bgetrune(finput);
1718                         if(c == '\n' || c <= 0)
1719                                 error("illegal or missing ' or \"" );
1720                         if(c == '\\') {
1721                                 tokname[i] = '\\';
1722                                 if(i < NAMESIZE)
1723                                         i++;
1724                                 c = Bgetrune(finput);
1725                         } else
1726                                 if(c == match)
1727                                         break;
1728                         rune = c;
1729                         c = runetochar(&tokname[i], &rune);
1730                         if(i < NAMESIZE)
1731                                 i += c;
1732                 }
1733                 break;
1734
1735         case '%':
1736         case '\\':
1737                 switch(c = Bgetrune(finput)) {
1738                 case '0':       return TERM;
1739                 case '<':       return LEFT;
1740                 case '2':       return BINARY;
1741                 case '>':       return RIGHT;
1742                 case '%':
1743                 case '\\':      return MARK;
1744                 case '=':       return PREC;
1745                 case '{':       return LCURLY;
1746                 default:        reserve = 1;
1747                 }
1748
1749         default:
1750                 /* number */
1751                 if(isdigit(c)) {
1752                         numbval = c-'0';
1753                         base = (c=='0')? 8: 10;
1754                         for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1755                                 numbval = numbval*base + (c-'0');
1756                         Bungetrune(finput);
1757                         return NUMBER;
1758                 }
1759                 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1760                         i = 0;
1761                         while(islower(c) || isupper(c) || isdigit(c) ||
1762                             c == '-' || c=='_' || c=='.' || c=='$') {
1763                                 if(reserve && isupper(c))
1764                                         c += 'a'-'A';
1765                                 rune = c;
1766                                 c = runetochar(&tokname[i], &rune);
1767                                 if(i < NAMESIZE)
1768                                         i += c;
1769                                 c = Bgetrune(finput);
1770                         }
1771                 } else
1772                         return c;
1773                 Bungetrune(finput);
1774         }
1775         tokname[i] = 0;
1776
1777         /* find a reserved word */
1778         if(reserve) {
1779                 for(c=0; resrv[c].name; c++)
1780                         if(strcmp(tokname, resrv[c].name) == 0)
1781                                 return resrv[c].value;
1782                 error("invalid escape, or illegal reserved word: %s", tokname);
1783         }
1784
1785         /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1786         c = Bgetrune(finput);
1787         while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1788                 if(c == '\n')
1789                         peekline++;
1790                 /* look for comments */
1791                 if(c == '/')
1792                         peekline += skipcom();
1793                 c = Bgetrune(finput);
1794         }
1795         if(c == ':')
1796                 return IDENTCOLON;
1797         Bungetrune(finput);
1798         return IDENTIFIER;
1799 }
1800
1801 /*
1802  * determine the type of a symbol
1803  */
1804 int
1805 fdtype(int t)
1806 {
1807         int v;
1808
1809         if(t >= NTBASE)
1810                 v = nontrst[t-NTBASE].value;
1811         else
1812                 v = TYPE(toklev[t]);
1813         if(v <= 0)
1814                 error("must specify type for %s", (t>=NTBASE)?
1815                         nontrst[t-NTBASE].name: tokset[t].name);
1816         return v;
1817 }
1818
1819 int
1820 chfind(int t, char *s)
1821 {
1822         int i;
1823
1824         if(s[0] == ' ')
1825                 t = 0;
1826         TLOOP(i)
1827                 if(!strcmp(s, tokset[i].name))
1828                         return i;
1829         NTLOOP(i)
1830                 if(!strcmp(s, nontrst[i].name))
1831                         return NTBASE+i;
1832
1833         /* cannot find name */
1834         if(t > 1)
1835                 error("%s should have been defined earlier", s);
1836         return defin(t, s);
1837 }
1838
1839 /*
1840  * copy the union declaration to the output, and the define file if present
1841  */
1842 void
1843 cpyunion(void)
1844 {
1845         long c;
1846         int level;
1847
1848         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1849         Bprint(ftable, "typedef union ");
1850         if(fdefine != 0)
1851                 Bprint(fdefine, "\ntypedef union ");
1852
1853         level = 0;
1854         for(;;) {
1855                 if((c=Bgetrune(finput)) == Beof)
1856                         error("EOF encountered while processing %%union");
1857                 Bputrune(ftable, c);
1858                 if(fdefine != 0)
1859                         Bputrune(fdefine, c);
1860                 switch(c) {
1861                 case '\n':
1862                         lineno++;
1863                         break;
1864                 case '{':
1865                         level++;
1866                         break;
1867                 case '}':
1868                         level--;
1869
1870                         /* we are finished copying */
1871                         if(level == 0) {
1872                                 Bprint(ftable, " YYSTYPE;\n");
1873                                 if(fdefine != 0)
1874                                         Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1875                                 return;
1876                         }
1877                 }
1878         }
1879 }
1880
1881 /*
1882  * copies code between \{ and \}
1883  */
1884 void
1885 cpycode(void)
1886 {
1887
1888         long c;
1889
1890         c = Bgetrune(finput);
1891         if(c == '\n') {
1892                 c = Bgetrune(finput);
1893                 lineno++;
1894         }
1895         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1896         while(c != Beof) {
1897                 if(c == '\\') {
1898                         if((c=Bgetrune(finput)) == '}')
1899                                 return;
1900                         Bputc(ftable, '\\');
1901                 }
1902                 if(c == '%') {
1903                         if((c=Bgetrune(finput)) == '}')
1904                                 return;
1905                         Bputc(ftable, '%');
1906                 }
1907                 Bputrune(ftable, c);
1908                 if(c == '\n')
1909                         lineno++;
1910                 c = Bgetrune(finput);
1911         }
1912         error("eof before %%}");
1913 }
1914
1915 /*
1916  * skip over comments
1917  * skipcom is called after reading a '/'
1918  */
1919 int
1920 skipcom(void)
1921 {
1922         long c;
1923         int i;
1924
1925         /* i is the number of lines skipped */
1926         i = 0;
1927         c = Bgetrune(finput);
1928         if(c == '/'){                   /* C++ //: skip to end of line */
1929                 while((c = Bgetrune(finput)) != Beof)
1930                         if(c == '\n')
1931                                 return 1;
1932         }else if(c == '*'){             /* normal C comment */
1933                 while((c = Bgetrune(finput)) != Beof) {
1934                         while(c == '*')
1935                                 if((c = Bgetrune(finput)) == '/')
1936                                         return i;
1937                         if(c == '\n')
1938                                 i++;
1939                 }
1940         }else
1941                 error("illegal comment");
1942
1943         error("EOF inside comment");
1944         return 0;
1945 }
1946
1947 /*
1948  * copy C action to the next ; or closing }
1949  */
1950 void
1951 cpyact(int offset)
1952 {
1953         long c;
1954         int brac, match, j, s, fnd, tok;
1955
1956         Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1957         brac = 0;
1958
1959 loop:
1960         c = Bgetrune(finput);
1961 swt:
1962         switch(c) {
1963         case ';':
1964                 if(brac == 0) {
1965                         Bputrune(faction, c);
1966                         return;
1967                 }
1968                 goto lcopy;
1969
1970         case '{':
1971                 brac++;
1972                 goto lcopy;
1973
1974         case '$':
1975                 s = 1;
1976                 tok = -1;
1977                 c = Bgetrune(finput);
1978
1979                 /* type description */
1980                 if(c == '<') {
1981                         Bungetrune(finput);
1982                         if(gettok() != TYPENAME)
1983                                 error("bad syntax on $<ident> clause");
1984                         tok = numbval;
1985                         c = Bgetrune(finput);
1986                 }
1987                 if(c == '$') {
1988                         Bprint(faction, "yyval");
1989
1990                         /* put out the proper tag... */
1991                         if(ntypes) {
1992                                 if(tok < 0)
1993                                         tok = fdtype(*prdptr[nprod]);
1994                                 Bprint(faction, ".%s", typeset[tok]);
1995                         }
1996                         goto loop;
1997                 }
1998                 if(c == '-') {
1999                         s = -s;
2000                         c = Bgetrune(finput);
2001                 }
2002                 if(isdigit(c)) {
2003                         j = 0;
2004                         while(isdigit(c)) {
2005                                 j = j*10 + (c-'0');
2006                                 c = Bgetrune(finput);
2007                         }
2008                         Bungetrune(finput);
2009                         j = j*s - offset;
2010                         if(j > 0)
2011                                 error("Illegal use of $%d", j+offset);
2012
2013                 dollar:
2014                         Bprint(faction, "yypt[-%d].yyv", -j);
2015
2016                         /* put out the proper tag */
2017                         if(ntypes) {
2018                                 if(j+offset <= 0 && tok < 0)
2019                                         error("must specify type of $%d", j+offset);
2020                                 if(tok < 0)
2021                                         tok = fdtype(prdptr[nprod][j+offset]);
2022                                 Bprint(faction, ".%s", typeset[tok]);
2023                         }
2024                         goto loop;
2025                 }
2026                 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2027                         int tok; /* tok used oustide for type info */
2028
2029                         /* look for $name */
2030                         Bungetrune(finput);
2031                         if(gettok() != IDENTIFIER)
2032                                 error("$ must be followed by an identifier");
2033                         tok = chfind(2, tokname);
2034                         if((c = Bgetrune(finput)) != '#') {
2035                                 Bungetrune(finput);
2036                                 fnd = -1;
2037                         } else
2038                                 if(gettok() != NUMBER) {
2039                                         error("# must be followed by number");
2040                                         fnd = -1;
2041                                 } else
2042                                         fnd = numbval;
2043                         for(j=1; j<=offset; ++j)
2044                                 if(tok == prdptr[nprod][j]) {
2045                                         if(--fnd <= 0) {
2046                                                 j -= offset;
2047                                                 goto dollar;
2048                                         }
2049                                 }
2050                         error("$name or $name#number not found");
2051                 }
2052                 Bputc(faction, '$');
2053                 if(s < 0 )
2054                         Bputc(faction, '-');
2055                 goto swt;
2056
2057         case '}':
2058                 brac--;
2059                 if(brac)
2060                         goto lcopy;
2061                 Bputrune(faction, c);
2062                 return;
2063
2064         case '/':
2065                 /* look for comments */
2066                 Bputrune(faction, c);
2067                 c = Bgetrune(finput);
2068                 if(c != '*')
2069                         goto swt;
2070
2071                 /* it really is a comment */
2072                 Bputrune(faction, c);
2073                 c = Bgetrune(finput);
2074                 while(c >= 0) {
2075                         while(c == '*') {
2076                                 Bputrune(faction, c);
2077                                 if((c=Bgetrune(finput)) == '/')
2078                                         goto lcopy;
2079                         }
2080                         Bputrune(faction, c);
2081                         if(c == '\n')
2082                                 lineno++;
2083                         c = Bgetrune(finput);
2084                 }
2085                 error("EOF inside comment");
2086
2087         case '\'':
2088                 /* character constant */
2089                 match = '\'';
2090                 goto string;
2091
2092         case '"':
2093                 /* character string */
2094                 match = '"';
2095
2096         string:
2097                 Bputrune(faction, c);
2098                 while(c = Bgetrune(finput)) {
2099                         if(c == '\\') {
2100                                 Bputrune(faction, c);
2101                                 c = Bgetrune(finput);
2102                                 if(c == '\n')
2103                                         lineno++;
2104                         } else {
2105                                 if(c == match)
2106                                         goto lcopy;
2107                                 if(c == '\n')
2108                                         error("newline in string or char. const.");
2109                         }
2110                         Bputrune(faction, c);
2111                 }
2112                 error("EOF in string or character constant");
2113
2114         case Beof:
2115                 error("action does not terminate");
2116
2117         case '\n':
2118                 lineno++;
2119                 goto lcopy;
2120         }
2121
2122 lcopy:
2123         Bputrune(faction, c);
2124         goto loop;
2125 }
2126
2127 void
2128 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2129 {
2130         char buf[256];
2131
2132         if(vflag) {
2133                 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2134                 foutput = Bopen(buf, OWRITE);
2135                 if(foutput == 0)
2136                         error("cannot open %s", buf);
2137                 Blethal(foutput, nil);
2138         }
2139         if(yydebug) {
2140                 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2141                 if((fdebug = Bopen(buf, OWRITE)) == 0)
2142                         error("can't open %s", buf);
2143         }
2144         if(dflag) {
2145                 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2146                 fdefine = Bopen(buf, OWRITE);
2147                 if(fdefine == 0)
2148                         error("can't create %s", buf);
2149                 Blethal(fdefine, nil);
2150         }
2151         if(ytab == 0)
2152                 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2153         else
2154                 strecpy(buf, buf+sizeof buf, ytabc);
2155         ftable = Bopen(buf, OWRITE);
2156         if(ftable == 0)
2157                 error("cannot open table file %s", buf);
2158         Blethal(ftable, nil);
2159 }
2160
2161 /*
2162  * print the output for the states
2163  */
2164 void
2165 output(void)
2166 {
2167         int i, k, c;
2168         Wset *u, *v;
2169
2170         Bprint(ftable, "short   yyexca[] =\n{");
2171         if(fdebug)
2172                 Bprint(fdebug, "char*   yystates[] =\n{\n");
2173
2174         /* output the stuff for state i */
2175         SLOOP(i) {
2176                 nolook = tystate[i]!=MUSTLOOKAHEAD;
2177                 closure(i);
2178
2179                 /* output actions */
2180                 nolook = 1;
2181                 aryfil(temp1, ntokens+nnonter+1, 0);
2182                 WSLOOP(wsets, u) {
2183                         c = *(u->pitem);
2184                         if(c > 1 && c < NTBASE && temp1[c] == 0) {
2185                                 WSLOOP(u, v)
2186                                         if(c == *(v->pitem))
2187                                                 putitem(v->pitem+1, (Lkset*)0);
2188                                 temp1[c] = state(c);
2189                         } else
2190                                 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2191                                         temp1[c+ntokens] = amem[indgo[i]+c];
2192                 }
2193                 if(i == 1)
2194                         temp1[1] = ACCEPTCODE;
2195
2196                 /* now, we have the shifts; look at the reductions */
2197                 lastred = 0;
2198                 WSLOOP(wsets, u) {
2199                         c = *u->pitem;
2200
2201                         /* reduction */
2202                         if(c <= 0) {
2203                                 lastred = -c;
2204                                 TLOOP(k)
2205                                         if(BIT(u->ws.lset, k)) {
2206                                                 if(temp1[k] == 0)
2207                                                         temp1[k] = c;
2208                                                 else
2209                                                 if(temp1[k] < 0) { /* reduce/reduce conflict */
2210                                                         if(foutput)
2211                                                                 Bprint(foutput,
2212                                                                         "\n%d: reduce/reduce conflict"
2213                                                                         " (red'ns %d and %d ) on %s",
2214                                                                         i, -temp1[k], lastred,
2215                                                                         symnam(k));
2216                                                         if(-temp1[k] > lastred)
2217                                                                 temp1[k] = -lastred;
2218                                                         zzrrconf++;
2219                                                 } else
2220                                                         /* potential shift/reduce conflict */
2221                                                         precftn( lastred, k, i );
2222                                         }
2223                                 }
2224                 }
2225                 wract(i);
2226         }
2227
2228         if(fdebug)
2229                 Bprint(fdebug, "};\n");
2230         Bprint(ftable, "};\n");
2231         Bprint(ftable, "#define YYNPROD %d\n", nprod);
2232         Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2233         if(yydebug)
2234                 Bprint(ftable, "#define yydebug %s\n", yydebug);
2235 }
2236
2237 /*
2238  * pack state i from temp1 into amem
2239  */
2240 int
2241 apack(int *p, int n)
2242 {
2243         int *pp, *qq, *rr, off, *q, *r;
2244
2245         /* we don't need to worry about checking because
2246          * we will only look at entries known to be there...
2247          * eliminate leading and trailing 0's
2248          */
2249
2250         q = p+n;
2251         for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2252                 ;
2253         /* no actions */
2254         if(pp > q)
2255                 return 0;
2256         p = pp;
2257
2258         /* now, find a place for the elements from p to q, inclusive */
2259         r = &amem[ACTSIZE-1];
2260         for(rr = amem; rr <= r; rr++, off++) {
2261                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2262                         if(*pp != 0)
2263                                 if(*pp != *qq && *qq != 0)
2264                                         goto nextk;
2265
2266                 /* we have found an acceptable k */
2267                 if(pkdebug && foutput != 0)
2268                         Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2269                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2270                         if(*pp) {
2271                                 if(qq > r)
2272                                         error("action table overflow");
2273                                 if(qq > memp)
2274                                         memp = qq;
2275                                 *qq = *pp;
2276                         }
2277                 if(pkdebug && foutput != 0)
2278                         for(pp = amem; pp <= memp; pp += 10) {
2279                                 Bprint(foutput, "\t");
2280                                 for(qq = pp; qq <= pp+9; qq++)
2281                                         Bprint(foutput, "%d ", *qq);
2282                                 Bprint(foutput, "\n");
2283                         }
2284                 return(off);
2285         nextk:;
2286         }
2287         error("no space in action table");
2288         return 0;
2289 }
2290
2291 /*
2292  * output the gotos for the nontermninals
2293  */
2294 void
2295 go2out(void)
2296 {
2297         int i, j, k, best, count, cbest, times;
2298
2299         /* mark begining of gotos */
2300         Bprint(ftemp, "$\n");
2301         for(i = 1; i <= nnonter; i++) {
2302                 go2gen(i);
2303
2304                 /* find the best one to make default */
2305                 best = -1;
2306                 times = 0;
2307
2308                 /* is j the most frequent */
2309                 for(j = 0; j <= nstate; j++) {
2310                         if(tystate[j] == 0)
2311                                 continue;
2312                         if(tystate[j] == best)
2313                                 continue;
2314
2315                         /* is tystate[j] the most frequent */
2316                         count = 0;
2317                         cbest = tystate[j];
2318                         for(k = j; k <= nstate; k++)
2319                                 if(tystate[k] == cbest)
2320                                         count++;
2321                         if(count > times) {
2322                                 best = cbest;
2323                                 times = count;
2324                         }
2325                 }
2326
2327                 /* best is now the default entry */
2328                 zzgobest += times-1;
2329                 for(j = 0; j <= nstate; j++)
2330                         if(tystate[j] != 0 && tystate[j] != best) {
2331                                 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2332                                 zzgoent++;
2333                         }
2334
2335                 /* now, the default */
2336                 if(best == -1)
2337                         best = 0;
2338                 zzgoent++;
2339                 Bprint(ftemp, "%d\n", best);
2340         }
2341 }
2342
2343 /*
2344  * output the gotos for nonterminal c
2345  */
2346 void
2347 go2gen(int c)
2348 {
2349         int i, work, cc;
2350         Item *p, *q;
2351
2352
2353         /* first, find nonterminals with gotos on c */
2354         aryfil(temp1, nnonter+1, 0);
2355         temp1[c] = 1;
2356         work = 1;
2357         while(work) {
2358                 work = 0;
2359                 PLOOP(0, i)
2360
2361                         /* cc is a nonterminal */
2362                         if((cc=prdptr[i][1]-NTBASE) >= 0)
2363                                 /* cc has a goto on c */
2364                                 if(temp1[cc] != 0) {
2365
2366                                         /* thus, the left side of production i does too */
2367                                         cc = *prdptr[i]-NTBASE;
2368                                         if(temp1[cc] == 0) {
2369                                                   work = 1;
2370                                                   temp1[cc] = 1;
2371                                         }
2372                                 }
2373         }
2374
2375         /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2376         if(g2debug && foutput != 0) {
2377                 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2378                 NTLOOP(i)
2379                         if(temp1[i])
2380                                 Bprint(foutput, "%s ", nontrst[i].name);
2381                 Bprint(foutput, "\n");
2382         }
2383
2384         /* now, go through and put gotos into tystate */
2385         aryfil(tystate, nstate, 0);
2386         SLOOP(i)
2387                 ITMLOOP(i, p, q)
2388                         if((cc = *p->pitem) >= NTBASE)
2389                                 /* goto on c is possible */
2390                                 if(temp1[cc-NTBASE]) {
2391                                         tystate[i] = amem[indgo[i]+c];
2392                                         break;
2393                                 }
2394 }
2395
2396 /*
2397  * decide a shift/reduce conflict by precedence.
2398  * r is a rule number, t a token number
2399  * the conflict is in state s
2400  * temp1[t] is changed to reflect the action
2401  */
2402 void
2403 precftn(int r, int t, int s)
2404 {
2405         int lp, lt, action;
2406
2407         lp = levprd[r];
2408         lt = toklev[t];
2409         if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2410
2411                 /* conflict */
2412                 if(foutput != 0)
2413                         Bprint(foutput,
2414                                 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2415                                 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2416                 zzsrconf++;
2417                 return;
2418         }
2419         if(PLEVEL(lt) == PLEVEL(lp))
2420                 action = ASSOC(lt);
2421         else
2422                 if(PLEVEL(lt) > PLEVEL(lp))
2423                         action = RASC;  /* shift */
2424                 else
2425                         action = LASC;  /* reduce */
2426         switch(action) {
2427         case BASC:  /* error action */
2428                 temp1[t] = ERRCODE;
2429                 break;
2430         case LASC:  /* reduce */
2431                 temp1[t] = -r;
2432                 break;
2433         }
2434 }
2435
2436 /*
2437  * output state i
2438  * temp1 has the actions, lastred the default
2439  */
2440 void
2441 wract(int i)
2442 {
2443         int p, p0, p1, ntimes, tred, count, j, flag;
2444
2445         /* find the best choice for lastred */
2446         lastred = 0;
2447         ntimes = 0;
2448         TLOOP(j) {
2449                 if(temp1[j] >= 0)
2450                         continue;
2451                 if(temp1[j]+lastred == 0)
2452                         continue;
2453                 /* count the number of appearances of temp1[j] */
2454                 count = 0;
2455                 tred = -temp1[j];
2456                 levprd[tred] |= REDFLAG;
2457                 TLOOP(p)
2458                         if(temp1[p]+tred == 0)
2459                                 count++;
2460                 if(count > ntimes) {
2461                         lastred = tred;
2462                         ntimes = count;
2463                 }
2464         }
2465
2466         /*
2467          * for error recovery, arrange that, if there is a shift on the
2468          * error recovery token, `error', that the default be the error action
2469          */
2470         if(temp1[2] > 0)
2471                 lastred = 0;
2472
2473         /* clear out entries in temp1 which equal lastred */
2474         TLOOP(p)
2475                 if(temp1[p]+lastred == 0)
2476                         temp1[p] = 0;
2477
2478         wrstate(i);
2479         defact[i] = lastred;
2480         flag = 0;
2481         TLOOP(p0)
2482                 if((p1=temp1[p0]) != 0) {
2483                         if(p1 < 0) {
2484                                 p1 = -p1;
2485                                 goto exc;
2486                         }
2487                         if(p1 == ACCEPTCODE) {
2488                                 p1 = -1;
2489                                 goto exc;
2490                         }
2491                         if(p1 == ERRCODE) {
2492                                 p1 = 0;
2493                         exc:
2494                                 if(flag++ == 0)
2495                                         Bprint(ftable, "-1, %d,\n", i);
2496                                 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2497                                 zzexcp++;
2498                                 continue;
2499                         }
2500                         Bprint(ftemp, "%d,%d,", p0, p1);
2501                         zzacent++;
2502                 }
2503         if(flag) {
2504                 defact[i] = -2;
2505                 Bprint(ftable, "\t-2, %d,\n", lastred);
2506         }
2507         Bprint(ftemp, "\n");
2508 }
2509
2510 /*
2511  * writes state i
2512  */
2513 void
2514 wrstate(int i)
2515 {
2516         int j0, j1;
2517         Item *pp, *qq;
2518         Wset *u;
2519
2520         if(fdebug) {
2521                 if(lastred) {
2522                         Bprint(fdebug, "        0, /*%d*/\n", i);
2523                 } else {
2524                         Bprint(fdebug, "        \"");
2525                         ITMLOOP(i, pp, qq)
2526                                 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2527                         if(tystate[i] == MUSTLOOKAHEAD)
2528                                 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2529                                         if(*u->pitem < 0)
2530                                                 Bprint(fdebug, "%s\\n", writem(u->pitem));
2531                         Bprint(fdebug, "\", /*%d*/\n", i);
2532                 }
2533         }
2534         if(foutput == 0)
2535                 return;
2536         Bprint(foutput, "\nstate %d\n", i);
2537         ITMLOOP(i, pp, qq)
2538                 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2539         if(tystate[i] == MUSTLOOKAHEAD)
2540                 /* print out empty productions in closure */
2541                 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2542                         if(*u->pitem < 0)
2543                                 Bprint(foutput, "\t%s\n", writem(u->pitem));
2544
2545         /* check for state equal to another */
2546         TLOOP(j0)
2547                 if((j1=temp1[j0]) != 0) {
2548                         Bprint(foutput, "\n\t%s  ", symnam(j0));
2549                         /* shift, error, or accept */
2550                         if(j1 > 0) {
2551                                 if(j1 == ACCEPTCODE)
2552                                         Bprint(foutput,  "accept");
2553                                 else
2554                                         if(j1 == ERRCODE)
2555                                                 Bprint(foutput, "error");
2556                                         else
2557                                                 Bprint(foutput, "shift %d", j1);
2558                         } else
2559                                 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2560                 }
2561
2562         /* output the final production */
2563         if(lastred)
2564                 Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2565                         lastred, rlines[lastred]);
2566         else
2567                 Bprint(foutput, "\n\t.  error\n\n");
2568
2569         /* now, output nonterminal actions */
2570         j1 = ntokens;
2571         for(j0 = 1; j0 <= nnonter; j0++) {
2572                 j1++;
2573                 if(temp1[j1])
2574                         Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2575         }
2576 }
2577
2578 void
2579 warray(char *s, int *v, int n)
2580 {
2581         int i;
2582
2583         Bprint(ftable, "short   %s[] =\n{", s);
2584         for(i=0;;) {
2585                 if(i%10 == 0)
2586                         Bprint(ftable, "\n");
2587                 Bprint(ftable, "%4d", v[i]);
2588                 i++;
2589                 if(i >= n) {
2590                         Bprint(ftable, "\n};\n");
2591                         break;
2592                 }
2593                 Bprint(ftable, ",");
2594         }
2595 }
2596
2597 /*
2598  * in order to free up the mem and amem arrays for the optimizer,
2599  * and still be able to output yyr1, etc., after the sizes of
2600  * the action array is known, we hide the nonterminals
2601  * derived by productions in levprd.
2602  */
2603
2604 void
2605 hideprod(void)
2606 {
2607         int i, j;
2608
2609         j = 0;
2610         levprd[0] = 0;
2611         PLOOP(1, i) {
2612                 if(!(levprd[i] & REDFLAG)) {
2613                         j++;
2614                         if(foutput != 0)
2615                                 Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2616                 }
2617                 levprd[i] = *prdptr[i] - NTBASE;
2618         }
2619         if(j)
2620                 print("%d rules never reduced\n", j);
2621 }
2622
2623 void
2624 callopt(void)
2625 {
2626         int i, *p, j, k, *q;
2627
2628         /* read the arrays from tempfile and set parameters */
2629         finput = Bopen(tempname, OREAD);
2630         if(finput == 0)
2631                 error("optimizer cannot open tempfile");
2632         Blethal(finput, nil);
2633
2634         pgo[0] = 0;
2635         temp1[0] = 0;
2636         nstate = 0;
2637         nnonter = 0;
2638         for(;;) {
2639                 switch(gtnm()) {
2640                 case '\n':
2641                         nstate++;
2642                         pmem--;
2643                         temp1[nstate] = pmem - mem0;
2644                 case ',':
2645                         continue;
2646                 case '$':
2647                         break;
2648                 default:
2649                         error("bad tempfile");
2650                 }
2651                 break;
2652         }
2653
2654         pmem--;
2655         temp1[nstate] = yypgo[0] = pmem - mem0;
2656         for(;;) {
2657                 switch(gtnm()) {
2658                 case '\n':
2659                         nnonter++;
2660                         yypgo[nnonter] = pmem-mem0;
2661                 case ',':
2662                         continue;
2663                 case -1:
2664                         break;
2665                 default:
2666                         error("bad tempfile");
2667                 }
2668                 break;
2669         }
2670         pmem--;
2671         yypgo[nnonter--] = pmem - mem0;
2672         for(i = 0; i < nstate; i++) {
2673                 k = 32000;
2674                 j = 0;
2675                 q = mem0 + temp1[i+1];
2676                 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2677                         if(*p > j)
2678                                 j = *p;
2679                         if(*p < k)
2680                                 k = *p;
2681                 }
2682                 /* nontrivial situation */
2683                 if(k <= j) {
2684                         /* j is now the range */
2685 /*                      j -= k;                 /* call scj */
2686                         if(k > maxoff)
2687                                 maxoff = k;
2688                 }
2689                 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2690                 if(j > maxspr)
2691                         maxspr = j;
2692         }
2693
2694         /* initialize ggreed table */
2695         for(i = 1; i <= nnonter; i++) {
2696                 ggreed[i] = 1;
2697                 j = 0;
2698
2699                 /* minimum entry index is always 0 */
2700                 q = mem0 + yypgo[i+1] - 1;
2701                 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2702                         ggreed[i] += 2;
2703                         if(*p > j)
2704                                 j = *p;
2705                 }
2706                 ggreed[i] = ggreed[i] + 2*j;
2707                 if(j > maxoff)
2708                         maxoff = j;
2709         }
2710
2711         /* now, prepare to put the shift actions into the amem array */
2712         for(i = 0; i < ACTSIZE; i++)
2713                 amem[i] = 0;
2714         maxa = amem;
2715         for(i = 0; i < nstate; i++) {
2716                 if(tystate[i] == 0 && adb > 1)
2717                         Bprint(ftable, "State %d: null\n", i);
2718                 indgo[i] = YYFLAG1;
2719         }
2720         while((i = nxti()) != NOMORE)
2721                 if(i >= 0)
2722                         stin(i);
2723                 else
2724                         gin(-i);
2725
2726         /* print amem array */
2727         if(adb > 2 )
2728                 for(p = amem; p <= maxa; p += 10) {
2729                         Bprint(ftable, "%4d  ", (int)(p-amem));
2730                         for(i = 0; i < 10; ++i)
2731                                 Bprint(ftable, "%4d  ", p[i]);
2732                         Bprint(ftable, "\n");
2733                 }
2734
2735         /* write out the output appropriate to the language */
2736         aoutput();
2737         osummary();
2738         ZAPFILE(tempname);
2739 }
2740
2741 void
2742 gin(int i)
2743 {
2744         int *p, *r, *s, *q1, *q2;
2745
2746         /* enter gotos on nonterminal i into array amem */
2747         ggreed[i] = 0;
2748
2749         q2 = mem0+ yypgo[i+1] - 1;
2750         q1 = mem0 + yypgo[i];
2751
2752         /* now, find amem place for it */
2753         for(p = amem; p < &amem[ACTSIZE]; p++) {
2754                 if(*p)
2755                         continue;
2756                 for(r = q1; r < q2; r += 2) {
2757                         s = p + *r + 1;
2758                         if(*s)
2759                                 goto nextgp;
2760                         if(s > maxa)
2761                                 if((maxa = s) > &amem[ACTSIZE])
2762                                         error("a array overflow");
2763                 }
2764                 /* we have found amem spot */
2765                 *p = *q2;
2766                 if(p > maxa)
2767                         if((maxa = p) > &amem[ACTSIZE])
2768                                 error("a array overflow");
2769                 for(r = q1; r < q2; r += 2) {
2770                         s = p + *r + 1;
2771                         *s = r[1];
2772                 }
2773                 pgo[i] = p-amem;
2774                 if(adb > 1)
2775                         Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2776                 return;
2777
2778         nextgp:;
2779         }
2780         error("cannot place goto %d\n", i);
2781 }
2782
2783 void
2784 stin(int i)
2785 {
2786         int *r, *s, n, flag, j, *q1, *q2;
2787
2788         tystate[i] = 0;
2789
2790         /* enter state i into the amem array */
2791         q2 = mem0+temp1[i+1];
2792         q1 = mem0+temp1[i];
2793         /* find an acceptable place */
2794         for(n = -maxoff; n < ACTSIZE; n++) {
2795                 flag = 0;
2796                 for(r = q1; r < q2; r += 2) {
2797                         if((s = *r + n + amem) < amem)
2798                                 goto nextn;
2799                         if(*s == 0)
2800                                 flag++;
2801                         else
2802                                 if(*s != r[1])
2803                                         goto nextn;
2804                 }
2805
2806                 /* check that the position equals another only if the states are identical */
2807                 for(j=0; j<nstate; j++) {
2808                         if(indgo[j] == n) {
2809
2810                                 /* we have some disagreement */
2811                                 if(flag)
2812                                         goto nextn;
2813                                 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2814
2815                                         /* states are equal */
2816                                         indgo[i] = n;
2817                                         if(adb > 1)
2818                                                 Bprint(ftable,
2819                                                 "State %d: entry at %d equals state %d\n",
2820                                                 i, n, j);
2821                                         return;
2822                                 }
2823
2824                                 /* we have some disagreement */
2825                                 goto nextn;
2826                         }
2827                 }
2828
2829                 for(r = q1; r < q2; r += 2) {
2830                         if((s = *r+n+amem) >= &amem[ACTSIZE])
2831                                 error("out of space in optimizer a array");
2832                         if(s > maxa)
2833                                 maxa = s;
2834                         if(*s != 0 && *s != r[1])
2835                                 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2836                         *s = r[1];
2837                 }
2838                 indgo[i] = n;
2839                 if(adb > 1)
2840                         Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2841                 return;
2842         nextn:;
2843         }
2844         error("Error; failure to place state %d\n", i);
2845 }
2846
2847 /*
2848  * finds the next i
2849  */
2850 int
2851 nxti(void)
2852 {
2853         int i, max, maxi;
2854
2855         max = 0;
2856         maxi = 0;
2857         for(i = 1; i <= nnonter; i++)
2858                 if(ggreed[i] >= max) {
2859                         max = ggreed[i];
2860                         maxi = -i;
2861                 }
2862         for(i = 0; i < nstate; ++i)
2863                 if(tystate[i] >= max) {
2864                         max = tystate[i];
2865                         maxi = i;
2866                 }
2867         if(nxdb)
2868                 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2869         if(max == 0)
2870                 return NOMORE;
2871         return maxi;
2872 }
2873
2874 /*
2875  * write summary
2876  */
2877 void
2878 osummary(void)
2879 {
2880
2881         int i, *p;
2882
2883         if(foutput == 0)
2884                 return;
2885         i = 0;
2886         for(p = maxa; p >= amem; p--)
2887                 if(*p == 0)
2888                         i++;
2889
2890         Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2891                 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2892         Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2893         Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2894 }
2895
2896 /*
2897  * this version is for C
2898  * write out the optimized parser
2899  */
2900 void
2901 aoutput(void)
2902 {
2903         Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2904         arout("yyact", amem, (maxa-amem)+1);
2905         arout("yypact", indgo, nstate);
2906         arout("yypgo", pgo, nnonter+1);
2907 }
2908
2909 void
2910 arout(char *s, int *v, int n)
2911 {
2912         int i;
2913
2914         Bprint(ftable, "short   %s[] =\n{", s);
2915         for(i = 0; i < n;) {
2916                 if(i%10 == 0)
2917                         Bprint(ftable, "\n");
2918                 Bprint(ftable, "%4d", v[i]);
2919                 i++;
2920                 if(i == n)
2921                         Bprint(ftable, "\n};\n");
2922                 else
2923                         Bprint(ftable, ",");
2924         }
2925 }
2926
2927 /*
2928  * read and convert an integer from the standard input
2929  * return the terminating character
2930  * blanks, tabs, and newlines are ignored
2931  */
2932 int
2933 gtnm(void)
2934 {
2935         int sign, val, c;
2936
2937         sign = 0;
2938         val = 0;
2939         while((c=Bgetrune(finput)) != Beof) {
2940                 if(isdigit(c)) {
2941                         val = val*10 + c-'0';
2942                         continue;
2943                 }
2944                 if(c == '-') {
2945                         sign = 1;
2946                         continue;
2947                 }
2948                 break;
2949         }
2950         if(sign)
2951                 val = -val;
2952         *pmem++ = val;
2953         if(pmem >= &mem0[MEMSIZE])
2954                 error("out of space");
2955         return c;
2956 }