]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/yacc.c
upas/marshal: add -S saveto to save outgoing mail, fix -F
[plan9front.git] / sys / src / cmd / yacc.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune      Bungetc         /* ok for now. */
7
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET         ((32+NTERMS)/32)        /* BOTCH?? +31 */
12 #define BIT(a,i)        ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)     ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)       (((n)+32)/32)
15
16 #define PARSER          "/sys/lib/yaccpar"
17 #define PARSERS         "/sys/lib/yaccpars"
18 #define TEMPNAME        "y.tmp.XXXXXX"
19 #define ACTNAME         "y.acts.XXXXXX"
20 #define OFILE           "tab.c"
21 #define FILEU           "output"
22 #define FILED           "tab.h"
23 #define FILEDEBUG       "debug"
24
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31         ACTSIZE         = 40000,
32         MEMSIZE         = 40000,
33         NSTATES         = 2000,
34         NTERMS          = 511,
35         NPROD           = 1600,
36         NNONTERM        = 600,
37         TEMPSIZE        = 2000,
38         CNAMSZ          = 10000,
39         LSETSIZE        = 2400,
40         WSETSIZE        = 350,
41
42         NAMESIZE        = 50,
43         NTYPES          = 63,
44         ISIZE           = 400,
45
46         PRIVATE         = 0xE000,       /* unicode private use */
47
48         /* relationships which must hold:
49                 TBITSET ints must hold NTERMS+1 bits...
50                 WSETSIZE >= NNONTERM
51                 LSETSIZE >= NNONTERM
52                 TEMPSIZE >= NTERMS + NNONTERM + 1
53                 TEMPSIZE >= NSTATES
54         */
55
56         NTBASE          = 010000,
57         ERRCODE         = 8190,
58         ACCEPTCODE      = 8191,
59
60         NOASC           = 0,    /* no assoc. */
61         LASC            = 1,    /* left assoc. */
62         RASC            = 2,    /* right assoc. */
63         BASC            = 3,    /* binary assoc. */
64
65         /* flags for state generation */
66
67         DONE            = 0,
68         MUSTDO          = 1,
69         MUSTLOOKAHEAD   = 2,
70
71         /* flags for a rule having an action, and being reduced */
72
73         ACTFLAG         = 04,
74         REDFLAG         = 010,
75
76         /* output parser flags */
77         YYFLAG1         = -1000,
78
79         /* parse tokens */
80         IDENTIFIER      = PRIVATE,
81         MARK,
82         TERM,
83         LEFT,
84         RIGHT,
85         BINARY,
86         PREC,
87         LCURLY,
88         IDENTCOLON,
89         NUMBER,
90         START,
91         TYPEDEF,
92         TYPENAME,
93         UNION,
94
95         ENDFILE         = 0,
96
97         EMPTY           = 1,
98         WHOKNOWS        = 0,
99         OK              = 1,
100         NOMORE          = -1000,
101 };
102
103         /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i)        ((i)&03)
106 #define PLEVEL(i)       (((i)>>4)&077)
107 #define TYPE(i)         (((i)>>10)&077)
108
109         /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j)     i |= j
112 #define SETPLEV(i,j)    i |= (j<<4)
113 #define SETTYPE(i,j)    i |= (j<<10)
114
115         /* looping macros */
116
117 #define TLOOP(i)        for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)       for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)      for(i=s; i<nprod; i++)
120 #define SLOOP(i)        for(i=0; i<nstate; i++)
121 #define WSBUMP(x)       x++
122 #define WSLOOP(s,j)     for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)  for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)      for(i=0; i<tbitset; i++)
125
126         /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(bfd, x) {\
129                 if(bfd) Bterm(bfd); \
130                 if(x) remove(x); \
131         }
132
133         /* I/O descriptors */
134
135 Biobuf* faction;        /* file for saving actions */
136 Biobuf* fdefine;        /* file for #defines */
137 Biobuf* fdebug;         /* y.debug for strings for debugging */
138 Biobuf* ftable;         /* y.tab.c file */
139 Biobuf* ftemp;          /* tempfile to pass 2 */
140 Biobuf* finput;         /* input file */
141 Biobuf* foutput;        /* y.output file */
142
143         /* communication variables between various I/O routines */
144
145 char*   infile;                 /* input file name */
146 char    inpath[1024];           /* input full path */
147 int     numbval;                /* value of an input number */
148 char    tokname[NAMESIZE+UTFmax+1];     /* input token name, slop for runes and 0 */
149
150         /* structure declarations */
151
152 typedef
153 struct
154 {
155         int     lset[TBITSET];
156 } Lkset;
157
158 typedef
159 struct
160 {
161         int*    pitem;
162         Lkset*  look;
163 } Item;
164
165 typedef
166 struct
167 {
168         char*   name;
169         int     value;
170 } Symb;
171
172 typedef
173 struct
174 {
175         int*    pitem;
176         int     flag;
177         Lkset   ws;
178 } Wset;
179
180         /* storage of names */
181
182 char    cnames[CNAMSZ];         /* place where token and nonterminal names are stored */
183 int     cnamsz = CNAMSZ;        /* size of cnames */
184 char*   cnamp = cnames;         /* place where next name is to be put in */
185 int     ndefout = 4;            /* number of defined symbols output */
186 char*   tempname;
187 char*   actname;
188 char    ttempname[] = TEMPNAME;
189 char    tactname[] = ACTNAME;
190 char*   parser = PARSER;
191 char*   yydebug;
192
193         /* storage of types */
194 int     ntypes;                 /* number of types defined */
195 char*   typeset[NTYPES];        /* pointers to type tags */
196
197         /* token information */
198
199 int     ntokens = 0 ;           /* number of tokens */
200 Symb    tokset[NTERMS];
201 int     toklev[NTERMS];         /* vector with the precedence of the terminals */
202
203         /* nonterminal information */
204
205 int     nnonter = -1;           /* the number of nonterminals */
206 Symb    nontrst[NNONTERM];
207 int     start;                  /* start symbol */
208
209         /* assigned token type values */
210 int     extval = 0;
211
212 char*   ytabc = OFILE;  /* name of y.tab.c */
213
214         /* grammar rule information */
215
216 int     mem0[MEMSIZE] ;         /* production storage */
217 int*    mem = mem0;
218 int     nprod = 1;              /* number of productions */
219 int*    prdptr[NPROD];          /* pointers to descriptions of productions */
220 int     levprd[NPROD];          /* precedence levels for the productions */
221 int     rlines[NPROD];          /* line number for this rule */
222
223         /* state information */
224
225 int     nstate = 0;             /* number of states */
226 Item*   pstate[NSTATES+2];      /* pointers to the descriptions of the states */
227 int     tystate[NSTATES];       /* contains type information about the states */
228 int     defact[NSTATES];        /* the default actions of states */
229 int     tstates[NTERMS];        /* states generated by terminal gotos */
230 int     ntstates[NNONTERM];     /* states generated by nonterminal gotos */
231 int     mstates[NSTATES];       /* chain of overflows of term/nonterm generation lists  */
232 int     lastred;                /* the number of the last reduction of a state */
233
234         /* lookahead set information */
235
236 Lkset   lkst[LSETSIZE];
237 int     nolook;                 /* flag to turn off lookahead computations */
238 int     tbitset;                /* size of lookahead sets */
239 int     nlset = 0;              /* next lookahead set index */
240 int     nolook = 0;             /* flag to suppress lookahead computations */
241 Lkset   clset;                  /* temporary storage for lookahead computations */
242
243         /* working set information */
244
245 Wset    wsets[WSETSIZE];
246 Wset*   cwp;
247
248         /* storage for action table */
249
250 int     amem[ACTSIZE];          /* action table storage */
251 int*    memp = amem;            /* next free action table position */
252 int     indgo[NSTATES];         /* index to the stored goto table */
253
254         /* temporary vector, indexable by states, terms, or ntokens */
255
256 int     temp1[TEMPSIZE];        /* temporary storage, indexed by terms + ntokens or states */
257 int     lineno = 1;             /* current input line number */
258 int     fatfl = 1;              /* if on, error is fatal */
259 int     nerrors = 0;            /* number of errors */
260
261         /* statistics collection variables */
262
263 int     zzgoent;
264 int     zzgobest;
265 int     zzacent;
266 int     zzexcp;
267 int     zzclose;
268 int     zzrrconf;
269 int     zzsrconf;
270
271 int*    ggreed = lkst[0].lset;
272 int*    pgo = wsets[0].ws.lset;
273 int*    yypgo = &nontrst[0].value;
274
275 int     maxspr = 0;             /* maximum spread of any entry */
276 int     maxoff = 0;             /* maximum offset into a array */
277 int*    pmem = mem0;
278 int*    maxa;
279 int     nxdb = 0;
280 int     adb = 0;
281
282
283         /* storage for information about the nonterminals */
284
285 int**   pres[NNONTERM+2];       /* vector of pointers to productions yielding each nonterminal */
286 Lkset*  pfirst[NNONTERM+2];     /* vector of pointers to first sets for each nonterminal */
287 int     pempty[NNONTERM+1];     /* vector of nonterminals nontrivially deriving e */
288
289         /* random stuff picked out from between functions */
290
291 int     indebug = 0;
292 Wset*   zzcwp = wsets;
293 int     zzgoent = 0;
294 int     zzgobest = 0;
295 int     zzacent = 0;
296 int     zzexcp = 0;
297 int     zzclose = 0;
298 int     zzsrconf = 0;
299 int*    zzmemsz = mem0;
300 int     zzrrconf = 0;
301 int     pidebug = 0;            /* debugging flag for putitem */
302 int     gsdebug = 0;
303 int     cldebug = 0;            /* debugging flag for closure */
304 int     pkdebug = 0;
305 int     g2debug = 0;
306
307 struct
308 {
309         char*   name;
310         long    value;
311 } resrv[] =
312 {
313         "binary",       BINARY,
314         "left",         LEFT,
315         "nonassoc",     BINARY,
316         "prec",         PREC,
317         "right",        RIGHT,
318         "start",        START,
319         "term",         TERM,
320         "token",        TERM,
321         "type",         TYPEDEF,
322         "union",        UNION,
323         0,
324 };
325
326         /* define functions */
327
328 void    main(int, char**);
329 void    others(void);
330 char*   chcopy(char*, char*);
331 char*   writem(int*);
332 char*   symnam(int);
333 void    summary(void);
334 void    error(char*, ...);
335 void    aryfil(int*, int, int);
336 int     setunion(int*, int*);
337 void    prlook(Lkset*);
338 void    cpres(void);
339 void    cpfir(void);
340 int     state(int);
341 void    putitem(int*, Lkset*);
342 void    cempty(void);
343 void    stagen(void);
344 void    closure(int);
345 Lkset*  flset(Lkset*);
346 void    cleantmp(void);
347 void    intr(void);
348 void    setup(int, char**);
349 void    finact(void);
350 int     defin(int, char*);
351 void    defout(int);
352 char*   cstash(char*);
353 long    gettok(void);
354 int     fdtype(int);
355 int     chfind(int, char*);
356 void    cpyunion(void);
357 void    cpycode(void);
358 int     skipcom(void);
359 void    cpyact(int);
360 void    openup(char*, int, int, int, char*);
361 void    output(void);
362 int     apack(int*, int);
363 void    go2out(void);
364 void    go2gen(int);
365 void    precftn(int, int, int);
366 void    wract(int);
367 void    wrstate(int);
368 void    warray(char*, int*, int);
369 void    hideprod(void);
370 void    callopt(void);
371 void    gin(int);
372 void    stin(int);
373 int     nxti(void);
374 void    osummary(void);
375 void    aoutput(void);
376 void    arout(char*, int*, int);
377 int     gtnm(void);
378
379 void
380 main(int argc, char *argv[])
381 {
382
383         setup(argc, argv);      /* initialize and read productions */
384         tbitset = NWORDS(ntokens);
385         cpres();                /* make table of which productions yield a given nonterminal */
386         cempty();               /* make a table of which nonterminals can match the empty string */
387         cpfir();                /* make a table of firsts of nonterminals */
388         stagen();               /* generate the states */
389         output();               /* write the states and the tables */
390         go2out();
391         hideprod();
392         summary();
393         callopt();
394         others();
395         exits(0);
396 }
397
398 /*
399  * put out other arrays, copy the parsers
400  */
401 void
402 others(void)
403 {
404         int c, i, j;
405
406         finput = Bopen(parser, OREAD);
407         if(finput == 0)
408                 error("cannot find parser %s", parser);
409         Blethal(finput, nil);
410         warray("yyr1", levprd, nprod);
411         aryfil(temp1, nprod, 0);
412         PLOOP(1, i)
413                 temp1[i] = prdptr[i+1]-prdptr[i]-2;
414         warray("yyr2", temp1, nprod);
415
416         aryfil(temp1, nstate, -1000);
417         TLOOP(i)
418                 for(j=tstates[i]; j!=0; j=mstates[j])
419                         temp1[j] = i;
420         NTLOOP(i)
421                 for(j=ntstates[i]; j!=0; j=mstates[j])
422                         temp1[j] = -i;
423         warray("yychk", temp1, nstate);
424         warray("yydef", defact, nstate);
425
426         /* put out token translation tables */
427         /* table 1 has 0-256 */
428         aryfil(temp1, 256, 0);
429         c = 0;
430         TLOOP(i) {
431                 j = tokset[i].value;
432                 if(j >= 0 && j < 256) {
433                         if(temp1[j]) {
434                                 print("yacc bug -- cant have 2 different Ts with same value\n");
435                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
436                                 nerrors++;
437                         }
438                         temp1[j] = i;
439                         if(j > c)
440                                 c = j;
441                 }
442         }
443         warray("yytok1", temp1, c+1);
444
445         /* table 2 has PRIVATE-PRIVATE+256 */
446         aryfil(temp1, 256, 0);
447         c = 0;
448         TLOOP(i) {
449                 j = tokset[i].value - PRIVATE;
450                 if(j >= 0 && j < 256) {
451                         if(temp1[j]) {
452                                 print("yacc bug -- cant have 2 different Ts with same value\n");
453                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
454                                 nerrors++;
455                         }
456                         temp1[j] = i;
457                         if(j > c)
458                                 c = j;
459                 }
460         }
461         warray("yytok2", temp1, c+1);
462
463         /* table 3 has everything else */
464         Bprint(ftable, "long    yytok3[] =\n{\n");
465         c = 0;
466         TLOOP(i) {
467                 j = tokset[i].value;
468                 if(j >= 0 && j < 256)
469                         continue;
470                 if(j >= PRIVATE && j < 256+PRIVATE)
471                         continue;
472
473                 Bprint(ftable, "%4d,%4d,", j, i);
474                 c++;
475                 if(c%5 == 0)
476                         Bprint(ftable, "\n");
477         }
478         Bprint(ftable, "%4d\n};\n", 0);
479
480         /* copy parser text */
481         Bprint(ftable, "\n#line\t1\t\"%s\"\n", parser);
482         while((c=Bgetrune(finput)) != Beof) {
483                 if(c == '$') {
484                         if((c = Bgetrune(finput)) != 'A')
485                                 Bputrune(ftable, '$');
486                         else { /* copy actions */
487                                 faction = Bopen(actname, OREAD);
488                                 if(faction == 0)
489                                         error("cannot reopen action tempfile");
490                                 Blethal(faction, nil);
491                                 while((c=Bgetrune(faction)) != Beof)
492                                         Bputrune(ftable, c);
493                                 ZAPFILE(faction, actname);
494                                 c = Bgetrune(finput);
495                         }
496                 }
497                 Bputrune(ftable, c);
498         }
499         Bterm(ftable);
500 }
501
502 /*
503  * copies string q into p, returning next free char ptr
504  */
505 char*
506 chcopy(char* p, char* q)
507 {
508         int c;
509
510         while(c = *q) {
511                 if(c == '"')
512                         *p++ = '\\';
513                 *p++ = c;
514                 q++;
515         }
516         *p = 0;
517         return p;
518 }
519
520 /*
521  * creates output string for item pointed to by pp
522  */
523 char*
524 writem(int *pp)
525 {
526         int i,*p;
527         static char sarr[ISIZE];
528         char* q;
529
530         for(p=pp; *p>0; p++)
531                 ;
532         p = prdptr[-*p];
533         q = chcopy(sarr, nontrst[*p-NTBASE].name);
534         q = chcopy(q, ": ");
535         for(;;) {
536                 *q = ' ';
537                 p++;
538                 if(p == pp)
539                         *q = '.';
540                 q++;
541                 *q = '\0';
542                 i = *p;
543                 if(i <= 0)
544                         break;
545                 q = chcopy(q, symnam(i));
546                 if(q > &sarr[ISIZE-30])
547                         error("item too big");
548         }
549
550         /* an item calling for a reduction */
551         i = *pp;
552         if(i < 0 ) {
553                 q = chcopy(q, "    (");
554                 sprint(q, "%d)", -i);
555         }
556         return sarr;
557 }
558
559 /*
560  * return a pointer to the name of symbol i
561  */
562 char*
563 symnam(int i)
564 {
565         char* cp;
566
567         cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
568         if(*cp == ' ')
569                 cp++;
570         return cp;
571 }
572
573 /*
574  * output the summary on y.output
575  */
576 void
577 summary(void)
578 {
579
580         if(foutput != 0) {
581                 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
582                         ntokens, NTERMS, nnonter, NNONTERM);
583                 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
584                         nprod, NPROD, nstate, NSTATES);
585                 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
586                         zzsrconf, zzrrconf);
587                 Bprint(foutput, "%d/%d working sets used\n",
588                         (int)(zzcwp-wsets), WSETSIZE);
589                 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
590                         (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
591                 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
592                 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
593                 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
594                 Bprint(foutput, "%d goto entries\n", zzgoent);
595                 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
596         }
597         if(zzsrconf != 0 || zzrrconf != 0) {
598                 print("\nconflicts: ");
599                 if(zzsrconf)
600                         print("%d shift/reduce", zzsrconf);
601                 if(zzsrconf && zzrrconf)
602                         print(", ");
603                 if(zzrrconf)
604                         print("%d reduce/reduce", zzrrconf);
605                 print("\n");
606         }
607         if(ftemp != 0) {
608                 Bterm(ftemp);
609                 ftemp = 0;
610         }
611         if(fdefine != 0) {
612                 Bterm(fdefine);
613                 fdefine = 0;
614         }
615 }
616
617 /*
618  * write out error comment -- NEEDS WORK
619  */
620 void
621 error(char *s, ...)
622 {
623         va_list ap;
624
625         nerrors++;
626         va_start(ap, s);
627         fprint(2, "%s:%d: fatal error: ", infile, lineno);
628         vfprint(2, s, ap);
629         fprint(2, "\n");
630         va_end(ap);
631         if(!fatfl)
632                 return;
633         summary();
634         cleantmp();
635         exits("error");
636 }
637
638 /*
639  * set elements 0 through n-1 to c
640  */
641 void
642 aryfil(int *v, int n, int c)
643 {
644         int i;
645
646         for(i=0; i<n; i++)
647                 v[i] = c;
648 }
649
650 /*
651  * set a to the union of a and b
652  * return 1 if b is not a subset of a, 0 otherwise
653  */
654 int
655 setunion(int *a, int *b)
656 {
657         int i, x, sub;
658
659         sub = 0;
660         SETLOOP(i) {
661                 x = *a;
662                 *a |= *b;
663                 if(*a != x)
664                         sub = 1;
665                 a++;
666                 b++;
667         }
668         return sub;
669 }
670
671 void
672 prlook(Lkset* p)
673 {
674         int j, *pp;
675
676         pp = p->lset;
677         if(pp == 0)
678                 Bprint(foutput, "\tNULL");
679         else {
680                 Bprint(foutput, " { ");
681                 TLOOP(j)
682                         if(BIT(pp,j))
683                                 Bprint(foutput, "%s ", symnam(j));
684                 Bprint(foutput, "}");
685         }
686 }
687
688 /*
689  * compute an array with the beginnings of  productions yielding given nonterminals
690  * The array pres points to these lists
691  * the array pyield has the lists: the total size is only NPROD+1
692  */
693 void
694 cpres(void)
695 {
696         int c, j, i, **pmem;
697         static int *pyield[NPROD];
698
699         pmem = pyield;
700         NTLOOP(i) {
701                 c = i+NTBASE;
702                 pres[i] = pmem;
703                 fatfl = 0;      /* make undefined  symbols  nonfatal */
704                 PLOOP(0, j)
705                         if(*prdptr[j] == c)
706                                 *pmem++ =  prdptr[j]+1;
707                 if(pres[i] == pmem)
708                         error("nonterminal %s not defined!", nontrst[i].name);
709         }
710         pres[i] = pmem;
711         fatfl = 1;
712         if(nerrors) {
713                 summary();
714                 cleantmp();
715                 exits("error");
716         }
717         if(pmem != &pyield[nprod])
718                 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
719 }
720
721 /*
722  * compute an array with the first of nonterminals
723  */
724 void
725 cpfir(void)
726 {
727         int *p, **s, i, **t, ch, changes;
728
729         zzcwp = &wsets[nnonter];
730         NTLOOP(i) {
731                 aryfil(wsets[i].ws.lset, tbitset, 0);
732                 t = pres[i+1];
733                 /* initially fill the sets */
734                 for(s=pres[i]; s<t; ++s)
735                         for(p = *s; (ch = *p) > 0; ++p) {
736                                 if(ch < NTBASE) {
737                                         SETBIT(wsets[i].ws.lset, ch);
738                                         break;
739                                 }
740                                 if(!pempty[ch-NTBASE])
741                                         break;
742                         }
743         }
744
745         /* now, reflect transitivity */
746         changes = 1;
747         while(changes) {
748                 changes = 0;
749                 NTLOOP(i) {
750                         t = pres[i+1];
751                         for(s = pres[i]; s < t; ++s)
752                                 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753                                         changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754                                         if(!pempty[ch])
755                                                 break;
756                                 }
757                 }
758         }
759
760         NTLOOP(i)
761                 pfirst[i] = flset(&wsets[i].ws);
762         if(!indebug)
763                 return;
764         if(foutput != 0)
765                 NTLOOP(i) {
766                         Bprint(foutput, "\n%s: ", nontrst[i].name);
767                         prlook(pfirst[i]);
768                         Bprint(foutput, " %d\n", pempty[i]);
769                 }
770 }
771
772 /*
773  * sorts last state,and sees if it equals earlier ones. returns state number
774  */
775 int
776 state(int c)
777 {
778         Item *p1, *p2, *k, *l, *q1, *q2;
779         int size1, size2, i;
780
781         p1 = pstate[nstate];
782         p2 = pstate[nstate+1];
783         if(p1 == p2)
784                 return 0;       /* null state */
785         /* sort the items */
786         for(k = p2-1; k > p1; k--)      /* make k the biggest */
787                 for(l = k-1; l >= p1; --l)
788                         if(l->pitem > k->pitem) {
789                                 int *s;
790                                 Lkset *ss;
791
792                                 s = k->pitem;
793                                 k->pitem = l->pitem;
794                                 l->pitem = s;
795                                 ss = k->look;
796                                 k->look = l->look;
797                                 l->look = ss;
798                         }
799         size1 = p2 - p1;        /* size of state */
800
801         for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802                 /* get ith state */
803                 q1 = pstate[i];
804                 q2 = pstate[i+1];
805                 size2 = q2 - q1;
806                 if(size1 != size2)
807                         continue;
808                 k = p1;
809                 for(l = q1; l < q2; l++) {
810                         if(l->pitem != k->pitem)
811                                 break;
812                         k++;
813                 }
814                 if(l != q2)
815                         continue;
816                 /* found it */
817                 pstate[nstate+1] = pstate[nstate];      /* delete last state */
818                 /* fix up lookaheads */
819                 if(nolook)
820                         return i;
821                 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822                         int s;
823
824                         SETLOOP(s)
825                                 clset.lset[s] = l->look->lset[s];
826                         if(setunion(clset.lset, k->look->lset)) {
827                                 tystate[i] = MUSTDO;
828                                 /* register the new set */
829                                 l->look = flset( &clset );
830                         }
831                 }
832                 return i;
833         }
834         /* state is new */
835         if(nolook)
836                 error("yacc state/nolook error");
837         pstate[nstate+2] = p2;
838         if(nstate+1 >= NSTATES)
839                 error("too many states");
840         if(c >= NTBASE) {
841                 mstates[nstate] = ntstates[c-NTBASE];
842                 ntstates[c-NTBASE] = nstate;
843         } else {
844                 mstates[nstate] = tstates[c];
845                 tstates[c] = nstate;
846         }
847         tystate[nstate] = MUSTDO;
848         return nstate++;
849 }
850
851 void
852 putitem(int *ptr, Lkset *lptr)
853 {
854         Item *j;
855
856         if(pidebug && foutput != 0)
857                 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858         j = pstate[nstate+1];
859         j->pitem = ptr;
860         if(!nolook)
861                 j->look = flset(lptr);
862         pstate[nstate+1] = ++j;
863         if((int*)j > zzmemsz) {
864                 zzmemsz = (int*)j;
865                 if(zzmemsz >=  &mem0[MEMSIZE])
866                         error("out of state space");
867         }
868 }
869
870 /*
871  * mark nonterminals which derive the empty string
872  * also, look for nonterminals which don't derive any token strings
873  */
874 void
875 cempty(void)
876 {
877
878         int i, *p;
879
880         /* first, use the array pempty to detect productions that can never be reduced */
881         /* set pempty to WHONOWS */
882         aryfil(pempty, nnonter+1, WHOKNOWS);
883
884         /* now, look at productions, marking nonterminals which derive something */
885 more:
886         PLOOP(0, i) {
887                 if(pempty[*prdptr[i] - NTBASE])
888                         continue;
889                 for(p = prdptr[i]+1; *p >= 0; ++p)
890                         if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891                                 break;
892                 /* production can be derived */
893                 if(*p < 0) {
894                         pempty[*prdptr[i]-NTBASE] = OK;
895                         goto more;
896                 }
897         }
898
899         /* now, look at the nonterminals, to see if they are all OK */
900         NTLOOP(i) {
901                 /* the added production rises or falls as the start symbol ... */
902                 if(i == 0)
903                         continue;
904                 if(pempty[i] != OK) {
905                         fatfl = 0;
906                         error("nonterminal %s never derives any token string", nontrst[i].name);
907                 }
908         }
909
910         if(nerrors) {
911                 summary();
912                 cleantmp();
913                 exits("error");
914         }
915
916         /* now, compute the pempty array, to see which nonterminals derive the empty string */
917         /* set pempty to WHOKNOWS */
918         aryfil( pempty, nnonter+1, WHOKNOWS);
919
920         /* loop as long as we keep finding empty nonterminals */
921
922 again:
923         PLOOP(1, i) {
924                 /* not known to be empty */
925                 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926                         for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
927                                 ;
928                         /* we have a nontrivially empty nonterminal */
929                         if(*p < 0) {
930                                 pempty[*prdptr[i]-NTBASE] = EMPTY;
931                                 /* got one ... try for another */
932                                 goto again;
933                         }
934                 }
935         }
936 }
937
938 /*
939  * generate the states
940  */
941 void
942 stagen(void)
943 {
944
945         int c, i, j, more;
946         Wset *p, *q;
947
948         /* initialize */
949         nstate = 0;
950
951         /* THIS IS FUNNY from the standpoint of portability
952          * it represents the magic moment when the mem0 array, which has
953          * been holding the productions, starts to hold item pointers, of a
954          * different type...
955          * someday, alloc should be used to allocate all this stuff... for now, we
956          * accept that if pointers don't fit in integers, there is a problem...
957          */
958
959         pstate[0] = pstate[1] = (Item*)mem;
960         aryfil(clset.lset, tbitset, 0);
961         putitem(prdptr[0]+1, &clset);
962         tystate[0] = MUSTDO;
963         nstate = 1;
964         pstate[2] = pstate[1];
965
966         aryfil(amem, ACTSIZE, 0);
967
968         /* now, the main state generation loop */
969         for(more=1; more;) {
970                 more = 0;
971                 SLOOP(i) {
972                         if(tystate[i] != MUSTDO)
973                                 continue;
974                         tystate[i] = DONE;
975                         aryfil(temp1, nnonter+1, 0);
976                         /* take state i, close it, and do gotos */
977                         closure(i);
978                         /* generate goto's */
979                         WSLOOP(wsets, p) {
980                                 if(p->flag)
981                                         continue;
982                                 p->flag = 1;
983                                 c = *(p->pitem);
984                                 if(c <= 1) {
985                                         if(pstate[i+1]-pstate[i] <= p-wsets)
986                                                 tystate[i] = MUSTLOOKAHEAD;
987                                         continue;
988                                 }
989                                 /* do a goto on c */
990                                 WSLOOP(p, q)
991                                         /* this item contributes to the goto */
992                                         if(c == *(q->pitem)) {
993                                                 putitem(q->pitem+1, &q->ws);
994                                                 q->flag = 1;
995                                         }
996                                 if(c < NTBASE)
997                                         state(c);       /* register new state */
998                                 else
999                                         temp1[c-NTBASE] = state(c);
1000                         }
1001                         if(gsdebug && foutput != 0) {
1002                                 Bprint(foutput, "%d: ", i);
1003                                 NTLOOP(j)
1004                                         if(temp1[j])
1005                                                 Bprint(foutput, "%s %d, ",
1006                                                 nontrst[j].name, temp1[j]);
1007                                 Bprint(foutput, "\n");
1008                         }
1009                         indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010                         /* do some more */
1011                         more = 1;
1012                 }
1013         }
1014 }
1015
1016 /*
1017  * generate the closure of state i
1018  */
1019 void
1020 closure(int i)
1021 {
1022
1023         Wset *u, *v;
1024         Item *p, *q;
1025         int c, ch, work, k, *pi, **s, **t;
1026
1027         zzclose++;
1028
1029         /* first, copy kernel of state i to wsets */
1030         cwp = wsets;
1031         ITMLOOP(i, p, q) {
1032                 cwp->pitem = p->pitem;
1033                 cwp->flag = 1;                  /* this item must get closed */
1034                 SETLOOP(k)
1035                         cwp->ws.lset[k] = p->look->lset[k];
1036                 WSBUMP(cwp);
1037         }
1038
1039         /* now, go through the loop, closing each item */
1040         work = 1;
1041         while(work) {
1042                 work = 0;
1043                 WSLOOP(wsets, u) {
1044                         if(u->flag == 0)
1045                                 continue;
1046                         /* dot is before c */
1047                         c = *(u->pitem);
1048                         if(c < NTBASE) {
1049                                 u->flag = 0;
1050                                 /* only interesting case is where . is before nonterminal */
1051                                 continue;
1052                         }
1053
1054                         /* compute the lookahead */
1055                         aryfil(clset.lset, tbitset, 0);
1056
1057                         /* find items involving c */
1058                         WSLOOP(u, v)
1059                                 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060                                         v->flag = 0;
1061                                         if(nolook)
1062                                                 continue;
1063                                         while((ch = *++pi) > 0) {
1064                                                 /* terminal symbol */
1065                                                 if(ch < NTBASE) {
1066                                                         SETBIT(clset.lset, ch);
1067                                                         break;
1068                                                 }
1069                                                 /* nonterminal symbol */
1070                                                 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071                                                 if(!pempty[ch-NTBASE])
1072                                                         break;
1073                                         }
1074                                         if(ch <= 0)
1075                                                 setunion(clset.lset, v->ws.lset);
1076                                 }
1077
1078                         /*
1079                          * now loop over productions derived from c
1080                          * c is now nonterminal number
1081                          */
1082                         c -= NTBASE;
1083                         t = pres[c+1];
1084                         for(s = pres[c]; s < t; ++s) {
1085                                 /*
1086                                  * put these items into the closure
1087                                  * is the item there
1088                                  */
1089                                 WSLOOP(wsets, v)
1090                                         /* yes, it is there */
1091                                         if(v->pitem == *s) {
1092                                                 if(nolook)
1093                                                         goto nexts;
1094                                                 if(setunion(v->ws.lset, clset.lset))
1095                                                         v->flag = work = 1;
1096                                                 goto nexts;
1097                                         }
1098
1099                                 /*  not there; make a new entry */
1100                                 if(cwp-wsets+1 >= WSETSIZE)
1101                                         error( "working set overflow");
1102                                 cwp->pitem = *s;
1103                                 cwp->flag = 1;
1104                                 if(!nolook) {
1105                                         work = 1;
1106                                         SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1107                                 }
1108                                 WSBUMP(cwp);
1109
1110                         nexts:;
1111                         }
1112                 }
1113         }
1114
1115         /* have computed closure; flags are reset; return */
1116         if(cwp > zzcwp)
1117                 zzcwp = cwp;
1118         if(cldebug && foutput != 0) {
1119                 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120                 WSLOOP(wsets, u) {
1121                         if(u->flag)
1122                                 Bprint(foutput, "flag set!\n");
1123                         u->flag = 0;
1124                         Bprint(foutput, "\t%s", writem(u->pitem));
1125                         prlook(&u->ws);
1126                         Bprint(foutput, "\n");
1127                 }
1128         }
1129 }
1130
1131 /*
1132  * decide if the lookahead set pointed to by p is known
1133  * return pointer to a perminent location for the set
1134  */
1135 Lkset*
1136 flset(Lkset *p)
1137 {
1138         Lkset *q;
1139         int *u, *v, *w, j;
1140
1141         for(q = &lkst[nlset]; q-- > lkst;) {
1142                 u = p->lset;
1143                 v = q->lset;
1144                 w = &v[tbitset];
1145                 while(v < w)
1146                         if(*u++ != *v++)
1147                                 goto more;
1148                 /* we have matched */
1149                 return q;
1150         more:;
1151         }
1152         /* add a new one */
1153         q = &lkst[nlset++];
1154         if(nlset >= LSETSIZE)
1155                 error("too many lookahead sets");
1156         SETLOOP(j)
1157                 q->lset[j] = p->lset[j];
1158         return q;
1159 }
1160
1161 void
1162 cleantmp(void)
1163 {
1164         ZAPFILE(faction, actname);
1165         ZAPFILE(ftemp, tempname);
1166 }
1167
1168 void
1169 intr(void)
1170 {
1171         cleantmp();
1172         exits("interrupted");
1173 }
1174
1175 void
1176 usage(void)
1177 {
1178         fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1179         exits("usage");
1180 }
1181
1182 void
1183 setup(int argc, char *argv[])
1184 {
1185         long c, t;
1186         int i, j, lev, ty, ytab, *p;
1187         int vflag, dflag, stem;
1188         char actnm[8], *stemc;
1189
1190         ytab = 0;
1191         vflag = 0;
1192         dflag = 0;
1193         stem = 0;
1194         stemc = "y";
1195         foutput = 0;
1196         fdefine = 0;
1197         fdebug = 0;
1198         ARGBEGIN{
1199         case 'v':
1200         case 'V':
1201                 vflag++;
1202                 break;
1203         case 'D':
1204                 yydebug = EARGF(usage());
1205                 break;
1206         case 'd':
1207                 dflag++;
1208                 break;
1209         case 'o':
1210                 ytab++;
1211                 ytabc = EARGF(usage());
1212                 break;
1213         case 's':
1214                 stem++;
1215                 stemc = ARGF();
1216                 break;
1217         case 'S':
1218                 parser = PARSERS;
1219                 break;
1220         default:
1221                 error("illegal option: %c", ARGC());
1222         }ARGEND
1223         openup(stemc, dflag, vflag, ytab, ytabc);
1224
1225         ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1226         faction = Bopen(actname = mktemp(tactname), OWRITE);
1227         if(ftemp == 0 || faction == 0)
1228                 error("cannot open temp file");
1229         Blethal(ftemp, nil);
1230         Blethal(faction, nil);
1231         if(argc < 1)
1232                 error("no input file");
1233
1234         infile = argv[0];
1235         finput = Bopen(infile, OREAD);
1236         if(finput == 0)
1237                 error("cannot open '%s'", argv[0]);
1238         if(fd2path(Bfildes(finput), inpath, sizeof(inpath)) == -1)
1239                 error("cannot get path for %s", infile);
1240         Blethal(finput, nil);
1241         cnamp = cnames;
1242
1243         defin(0, "$end");
1244         extval = PRIVATE;       /* tokens start in unicode 'private use' */
1245         defin(0, "error");
1246         defin(1, "$accept");
1247         defin(0, "$unk");
1248         mem = mem0;
1249         i = 0;
1250
1251         for(t = gettok(); t != MARK && t != ENDFILE;)
1252         switch(t) {
1253         case ';':
1254                 t = gettok();
1255                 break;
1256
1257         case START:
1258                 if(gettok() != IDENTIFIER)
1259                         error("bad %%start construction");
1260                 start = chfind(1, tokname);
1261                 t = gettok();
1262                 continue;
1263
1264         case TYPEDEF:
1265                 if(gettok() != TYPENAME)
1266                         error("bad syntax in %%type");
1267                 ty = numbval;
1268                 for(;;) {
1269                         t = gettok();
1270                         switch(t) {
1271                         case IDENTIFIER:
1272                                 if((t=chfind(1, tokname)) < NTBASE) {
1273                                         j = TYPE(toklev[t]);
1274                                         if(j != 0 && j != ty)
1275                                                 error("type redeclaration of token %s",
1276                                                         tokset[t].name);
1277                                         else
1278                                                 SETTYPE(toklev[t], ty);
1279                                 } else {
1280                                         j = nontrst[t-NTBASE].value;
1281                                         if(j != 0 && j != ty)
1282                                                 error("type redeclaration of nonterminal %s",
1283                                                         nontrst[t-NTBASE].name );
1284                                         else
1285                                                 nontrst[t-NTBASE].value = ty;
1286                                 }
1287                         case ',':
1288                                 continue;
1289                         case ';':
1290                                 t = gettok();
1291                         default:
1292                                 break;
1293                         }
1294                         break;
1295                 }
1296                 continue;
1297
1298         case UNION:
1299                 /* copy the union declaration to the output */
1300                 cpyunion();
1301                 t = gettok();
1302                 continue;
1303
1304         case LEFT:
1305         case BINARY:
1306         case RIGHT:
1307                 i++;
1308
1309         case TERM:
1310                 /* nonzero means new prec. and assoc. */
1311                 lev = t-TERM;
1312                 ty = 0;
1313
1314                 /* get identifiers so defined */
1315                 t = gettok();
1316
1317                 /* there is a type defined */
1318                 if(t == TYPENAME) {
1319                         ty = numbval;
1320                         t = gettok();
1321                 }
1322                 for(;;) {
1323                         switch(t) {
1324                         case ',':
1325                                 t = gettok();
1326                                 continue;
1327
1328                         case ';':
1329                                 break;
1330
1331                         case IDENTIFIER:
1332                                 j = chfind(0, tokname);
1333                                 if(j >= NTBASE)
1334                                         error("%s defined earlier as nonterminal", tokname);
1335                                 if(lev) {
1336                                         if(ASSOC(toklev[j]))
1337                                                 error("redeclaration of precedence of %s", tokname);
1338                                         SETASC(toklev[j], lev);
1339                                         SETPLEV(toklev[j], i);
1340                                 }
1341                                 if(ty) {
1342                                         if(TYPE(toklev[j]))
1343                                                 error("redeclaration of type of %s", tokname);
1344                                         SETTYPE(toklev[j],ty);
1345                                 }
1346                                 t = gettok();
1347                                 if(t == NUMBER) {
1348                                         tokset[j].value = numbval;
1349                                         if(j < ndefout && j > 3)
1350                                                 error("please define type number of %s earlier",
1351                                                         tokset[j].name);
1352                                         t = gettok();
1353                                 }
1354                                 continue;
1355                         }
1356                         break;
1357                 }
1358                 continue;
1359
1360         case LCURLY:
1361                 defout(0);
1362                 cpycode();
1363                 t = gettok();
1364                 continue;
1365
1366         default:
1367                 error("syntax error");
1368         }
1369         if(t == ENDFILE)
1370                 error("unexpected EOF before %%");
1371
1372         /* t is MARK */
1373         Bprint(ftable, "extern  int     yyerrflag;\n");
1374         Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1375         Bprint(ftable, "#define YYMAXDEPTH      150\n");
1376         Bprint(ftable, "#endif\n" );
1377         if(!ntypes) {
1378                 Bprint(ftable, "#ifndef YYSTYPE\n");
1379                 Bprint(ftable, "#define YYSTYPE int\n");
1380                 Bprint(ftable, "#endif\n");
1381         }
1382         Bprint(ftable, "YYSTYPE yylval;\n");
1383         Bprint(ftable, "YYSTYPE yyval;\n");
1384
1385         prdptr[0] = mem;
1386
1387         /* added production */
1388         *mem++ = NTBASE;
1389
1390         /* if start is 0, we will overwrite with the lhs of the first rule */
1391         *mem++ = start;
1392         *mem++ = 1;
1393         *mem++ = 0;
1394         prdptr[1] = mem;
1395         while((t=gettok()) == LCURLY)
1396                 cpycode();
1397         if(t != IDENTCOLON)
1398                 error("bad syntax on first rule");
1399
1400         if(!start)
1401                 prdptr[0][1] = chfind(1, tokname);
1402
1403         /* read rules */
1404         while(t != MARK && t != ENDFILE) {
1405                 /* process a rule */
1406                 rlines[nprod] = lineno;
1407                 if(t == '|')
1408                         *mem++ = *prdptr[nprod-1];
1409                 else
1410                         if(t == IDENTCOLON) {
1411                                 *mem = chfind(1, tokname);
1412                                 if(*mem < NTBASE)
1413                                         error("token illegal on LHS of grammar rule");
1414                                 mem++;
1415                         } else
1416                                 error("illegal rule: missing semicolon or | ?");
1417                 /* read rule body */
1418                 t = gettok();
1419
1420         more_rule:
1421                 while(t == IDENTIFIER) {
1422                         *mem = chfind(1, tokname);
1423                         if(*mem < NTBASE)
1424                                 levprd[nprod] = toklev[*mem];
1425                         mem++;
1426                         t = gettok();
1427                 }
1428                 if(t == PREC) {
1429                         if(gettok() != IDENTIFIER)
1430                                 error("illegal %%prec syntax");
1431                         j = chfind(2, tokname);
1432                         if(j >= NTBASE)
1433                                 error("nonterminal %s illegal after %%prec",
1434                                         nontrst[j-NTBASE].name);
1435                         levprd[nprod] = toklev[j];
1436                         t = gettok();
1437                 }
1438                 if(t == '=') {
1439                         levprd[nprod] |= ACTFLAG;
1440                         Bprint(faction, "\ncase %d:", nprod);
1441                         cpyact(mem-prdptr[nprod]-1);
1442                         Bprint(faction, " break;");
1443                         if((t=gettok()) == IDENTIFIER) {
1444
1445                                 /* action within rule... */
1446                                 sprint(actnm, "$$%d", nprod);
1447
1448                                 /* make it a nonterminal */
1449                                 j = chfind(1, actnm);
1450
1451                                 /*
1452                                  * the current rule will become rule number nprod+1
1453                                  * move the contents down, and make room for the null
1454                                  */
1455                                 for(p = mem; p >= prdptr[nprod]; --p)
1456                                         p[2] = *p;
1457                                 mem += 2;
1458
1459                                 /* enter null production for action */
1460                                 p = prdptr[nprod];
1461                                 *p++ = j;
1462                                 *p++ = -nprod;
1463
1464                                 /* update the production information */
1465                                 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1466                                 levprd[nprod] = ACTFLAG;
1467                                 if(++nprod >= NPROD)
1468                                         error("more than %d rules", NPROD);
1469                                 prdptr[nprod] = p;
1470
1471                                 /* make the action appear in the original rule */
1472                                 *mem++ = j;
1473
1474                                 /* get some more of the rule */
1475                                 goto more_rule;
1476                         }
1477                 }
1478
1479                 while(t == ';')
1480                         t = gettok();
1481                 *mem++ = -nprod;
1482
1483                 /* check that default action is reasonable */
1484                 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1485
1486                         /* no explicit action, LHS has value */
1487                         int tempty;
1488
1489                         tempty = prdptr[nprod][1];
1490                         if(tempty < 0)
1491                                 error("must return a value, since LHS has a type");
1492                         else
1493                                 if(tempty >= NTBASE)
1494                                         tempty = nontrst[tempty-NTBASE].value;
1495                                 else
1496                                         tempty = TYPE(toklev[tempty]);
1497                         if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1498                                 error("default action causes potential type clash");
1499                 }
1500                 nprod++;
1501                 if(nprod >= NPROD)
1502                         error("more than %d rules", NPROD);
1503                 prdptr[nprod] = mem;
1504                 levprd[nprod] = 0;
1505         }
1506
1507         /* end of all rules */
1508         defout(1);
1509
1510         finact();
1511         if(t == MARK) {
1512                 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1513                 while((c=Bgetrune(finput)) != Beof)
1514                         Bputrune(ftable, c);
1515         }
1516         Bterm(finput);
1517 }
1518
1519 /*
1520  * finish action routine
1521  */
1522 void
1523 finact(void)
1524 {
1525
1526         Bterm(faction);
1527         Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1528         Bprint(ftable, "#define YYERRCODE %d\n", 2);
1529 }
1530
1531 /*
1532  * define s to be a terminal if t=0
1533  * or a nonterminal if t=1
1534  */
1535 int
1536 defin(int nt, char *s)
1537 {
1538         int val;
1539         Rune rune;
1540
1541         val = 0;
1542         if(nt) {
1543                 nnonter++;
1544                 if(nnonter >= NNONTERM)
1545                         error("too many nonterminals, limit %d",NNONTERM);
1546                 nontrst[nnonter].name = cstash(s);
1547                 return NTBASE + nnonter;
1548         }
1549
1550         /* must be a token */
1551         ntokens++;
1552         if(ntokens >= NTERMS)
1553                 error("too many terminals, limit %d", NTERMS);
1554         tokset[ntokens].name = cstash(s);
1555
1556         /* establish value for token */
1557         /* single character literal */
1558         if(s[0] == ' ') {
1559                 val = chartorune(&rune, &s[1]);
1560                 if(s[val+1] == 0) {
1561                         val = rune;
1562                         goto out;
1563                 }
1564         }
1565
1566         /* escape sequence */
1567         if(s[0] == ' ' && s[1] == '\\') {
1568                 if(s[3] == 0) {
1569                         /* single character escape sequence */
1570                         switch(s[2]) {
1571                         case 'n':       val = '\n'; break;
1572                         case 'r':       val = '\r'; break;
1573                         case 'b':       val = '\b'; break;
1574                         case 't':       val = '\t'; break;
1575                         case 'f':       val = '\f'; break;
1576                         case '\'':      val = '\''; break;
1577                         case '"':       val = '"'; break;
1578                         case '\\':      val = '\\'; break;
1579                         default:        error("invalid escape");
1580                         }
1581                         goto out;
1582                 }
1583
1584                 /* \nnn sequence */
1585                 if(s[2] >= '0' && s[2] <= '7') {
1586                         if(s[3] < '0' ||
1587                            s[3] > '7' ||
1588                            s[4] < '0' ||
1589                            s[4] > '7' ||
1590                            s[5] != 0)
1591                                 error("illegal \\nnn construction");
1592                         val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1593                         if(val == 0)
1594                                 error("'\\000' is illegal");
1595                         goto out;
1596                 }
1597                 error("unknown escape");
1598         }
1599         val = extval++;
1600
1601 out:
1602         tokset[ntokens].value = val;
1603         toklev[ntokens] = 0;
1604         return ntokens;
1605 }
1606
1607 /*
1608  * write out the defines (at the end of the declaration section)
1609  */
1610 void
1611 defout(int last)
1612 {
1613         int i, c;
1614         char sar[NAMESIZE+10];
1615
1616         for(i=ndefout; i<=ntokens; i++) {
1617                 /* non-literals */
1618                 c = tokset[i].name[0];
1619                 if(c != ' ' && c != '$') {
1620                         Bprint(ftable, "#define %s      %d\n",
1621                                 tokset[i].name, tokset[i].value);
1622                         if(fdefine)
1623                                 Bprint(fdefine, "#define\t%s\t%d\n",
1624                                         tokset[i].name, tokset[i].value);
1625                 }
1626         }
1627         ndefout = ntokens+1;
1628         if(last && fdebug) {
1629                 Bprint(fdebug, "char*   yytoknames[] =\n{\n");
1630                 TLOOP(i) {
1631                         if(tokset[i].name) {
1632                                 chcopy(sar, tokset[i].name);
1633                                 Bprint(fdebug, "\t\"%s\",\n", sar);
1634                                 continue;
1635                         }
1636                         Bprint(fdebug, "\t0,\n");
1637                 }
1638                 Bprint(fdebug, "};\n");
1639         }
1640 }
1641
1642 char*
1643 cstash(char *s)
1644 {
1645         char *temp;
1646
1647         temp = cnamp;
1648         do {
1649                 if(cnamp >= &cnames[cnamsz])
1650                         error("too many characters in id's and literals");
1651                 else
1652                         *cnamp++ = *s;
1653         } while(*s++);
1654         return temp;
1655 }
1656
1657 long
1658 gettok(void)
1659 {
1660         long c;
1661         Rune rune;
1662         int i, base, match, reserve;
1663         static int peekline;
1664
1665 begin:
1666         reserve = 0;
1667         lineno += peekline;
1668         peekline = 0;
1669         c = Bgetrune(finput);
1670         while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1671                 if(c == '\n')
1672                         lineno++;
1673                 c = Bgetrune(finput);
1674         }
1675
1676         /* skip comment */
1677         if(c == '/') {
1678                 lineno += skipcom();
1679                 goto begin;
1680         }
1681         switch(c) {
1682         case Beof:
1683                 return ENDFILE;
1684
1685         case '{':
1686                 Bungetrune(finput);
1687                 return '=';
1688
1689         case '<':
1690                 /* get, and look up, a type name (union member name) */
1691                 i = 0;
1692                 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1693                         rune = c;
1694                         c = runetochar(&tokname[i], &rune);
1695                         if(i < NAMESIZE)
1696                                 i += c;
1697                 }
1698                 if(c != '>')
1699                         error("unterminated < ... > clause");
1700                 tokname[i] = 0;
1701                 for(i=1; i<=ntypes; i++)
1702                         if(!strcmp(typeset[i], tokname)) {
1703                                 numbval = i;
1704                                 return TYPENAME;
1705                         }
1706                 ntypes++;
1707                 numbval = ntypes;
1708                 typeset[numbval] = cstash(tokname);
1709                 return TYPENAME;
1710
1711         case '"':
1712         case '\'':
1713                 match = c;
1714                 tokname[0] = ' ';
1715                 i = 1;
1716                 for(;;) {
1717                         c = Bgetrune(finput);
1718                         if(c == '\n' || c <= 0)
1719                                 error("illegal or missing ' or \"" );
1720                         if(c == '\\') {
1721                                 tokname[i] = '\\';
1722                                 if(i < NAMESIZE)
1723                                         i++;
1724                                 c = Bgetrune(finput);
1725                         } else
1726                                 if(c == match)
1727                                         break;
1728                         rune = c;
1729                         c = runetochar(&tokname[i], &rune);
1730                         if(i < NAMESIZE)
1731                                 i += c;
1732                 }
1733                 break;
1734
1735         case '%':
1736         case '\\':
1737                 switch(c = Bgetrune(finput)) {
1738                 case '0':       return TERM;
1739                 case '<':       return LEFT;
1740                 case '2':       return BINARY;
1741                 case '>':       return RIGHT;
1742                 case '%':
1743                 case '\\':      return MARK;
1744                 case '=':       return PREC;
1745                 case '{':       return LCURLY;
1746                 default:        reserve = 1;
1747                 }
1748
1749         default:
1750                 /* number */
1751                 if(isdigit(c)) {
1752                         numbval = c-'0';
1753                         base = (c=='0')? 8: 10;
1754                         for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1755                                 numbval = numbval*base + (c-'0');
1756                         Bungetrune(finput);
1757                         return NUMBER;
1758                 }
1759                 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1760                         i = 0;
1761                         while(islower(c) || isupper(c) || isdigit(c) ||
1762                             c == '-' || c=='_' || c=='.' || c=='$') {
1763                                 if(reserve && isupper(c))
1764                                         c += 'a'-'A';
1765                                 rune = c;
1766                                 c = runetochar(&tokname[i], &rune);
1767                                 if(i < NAMESIZE)
1768                                         i += c;
1769                                 c = Bgetrune(finput);
1770                         }
1771                 } else
1772                         return c;
1773                 Bungetrune(finput);
1774         }
1775         tokname[i] = 0;
1776
1777         /* find a reserved word */
1778         if(reserve) {
1779                 for(c=0; resrv[c].name; c++)
1780                         if(strcmp(tokname, resrv[c].name) == 0)
1781                                 return resrv[c].value;
1782                 error("invalid escape, or illegal reserved word: %s", tokname);
1783         }
1784
1785         /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1786         c = Bgetrune(finput);
1787         while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1788                 if(c == '\n')
1789                         peekline++;
1790                 /* look for comments */
1791                 if(c == '/')
1792                         peekline += skipcom();
1793                 c = Bgetrune(finput);
1794         }
1795         if(c == ':')
1796                 return IDENTCOLON;
1797         Bungetrune(finput);
1798         return IDENTIFIER;
1799 }
1800
1801 /*
1802  * determine the type of a symbol
1803  */
1804 int
1805 fdtype(int t)
1806 {
1807         int v;
1808
1809         if(t >= NTBASE)
1810                 v = nontrst[t-NTBASE].value;
1811         else
1812                 v = TYPE(toklev[t]);
1813         if(v <= 0)
1814                 error("must specify type for %s", (t>=NTBASE)?
1815                         nontrst[t-NTBASE].name: tokset[t].name);
1816         return v;
1817 }
1818
1819 int
1820 chfind(int t, char *s)
1821 {
1822         int i;
1823
1824         if(s[0] == ' ')
1825                 t = 0;
1826         TLOOP(i)
1827                 if(!strcmp(s, tokset[i].name))
1828                         return i;
1829         NTLOOP(i)
1830                 if(!strcmp(s, nontrst[i].name))
1831                         return NTBASE+i;
1832
1833         /* cannot find name */
1834         if(t > 1)
1835                 error("%s should have been defined earlier", s);
1836         return defin(t, s);
1837 }
1838
1839 /*
1840  * copy the union declaration to the output, and the define file if present
1841  */
1842 void
1843 cpyunion(void)
1844 {
1845         long c;
1846         int level;
1847
1848         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1849         Bprint(ftable, "typedef union ");
1850         if(fdefine != 0)
1851                 Bprint(fdefine, "\ntypedef union ");
1852
1853         level = 0;
1854         for(;;) {
1855                 if((c=Bgetrune(finput)) == Beof)
1856                         error("EOF encountered while processing %%union");
1857                 Bputrune(ftable, c);
1858                 if(fdefine != 0)
1859                         Bputrune(fdefine, c);
1860                 switch(c) {
1861                 case '\n':
1862                         lineno++;
1863                         break;
1864                 case '{':
1865                         level++;
1866                         break;
1867                 case '}':
1868                         level--;
1869
1870                         /* we are finished copying */
1871                         if(level == 0) {
1872                                 Bprint(ftable, " YYSTYPE;\n");
1873                                 if(fdefine != 0)
1874                                         Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1875                                 return;
1876                         }
1877                 }
1878         }
1879 }
1880
1881 /*
1882  * copies code between \{ and \}
1883  */
1884 void
1885 cpycode(void)
1886 {
1887
1888         long c;
1889
1890         c = Bgetrune(finput);
1891         if(c == '\n') {
1892                 c = Bgetrune(finput);
1893                 lineno++;
1894         }
1895         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1896         while(c != Beof) {
1897                 if(c == '\\') {
1898                         if((c=Bgetrune(finput)) == '}')
1899                                 return;
1900                         Bputc(ftable, '\\');
1901                 }
1902                 if(c == '%') {
1903                         if((c=Bgetrune(finput)) == '}')
1904                                 return;
1905                         Bputc(ftable, '%');
1906                 }
1907                 Bputrune(ftable, c);
1908                 if(c == '\n')
1909                         lineno++;
1910                 c = Bgetrune(finput);
1911         }
1912         error("eof before %%}");
1913 }
1914
1915 /*
1916  * skip over comments
1917  * skipcom is called after reading a '/'
1918  */
1919 int
1920 skipcom(void)
1921 {
1922         long c;
1923         int i;
1924
1925         /* i is the number of lines skipped */
1926         i = 0;
1927         c = Bgetrune(finput);
1928         if(c == '/'){                   /* C++ //: skip to end of line */
1929                 while((c = Bgetrune(finput)) != Beof)
1930                         if(c == '\n')
1931                                 return 1;
1932         }else if(c == '*'){             /* normal C comment */
1933                 while((c = Bgetrune(finput)) != Beof) {
1934                         while(c == '*')
1935                                 if((c = Bgetrune(finput)) == '/')
1936                                         return i;
1937                         if(c == '\n')
1938                                 i++;
1939                 }
1940         }else
1941                 error("illegal comment");
1942
1943         error("EOF inside comment");
1944         return 0;
1945 }
1946
1947 /*
1948  * copy C action to the next ; or closing }
1949  */
1950 void
1951 cpyact(int offset)
1952 {
1953         long c;
1954         int brac, match, j, s, fnd, tok;
1955
1956         Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1957         brac = 0;
1958
1959 loop:
1960         c = Bgetrune(finput);
1961 swt:
1962         switch(c) {
1963         case ';':
1964                 if(brac == 0) {
1965                         Bputrune(faction, c);
1966                         return;
1967                 }
1968                 goto lcopy;
1969
1970         case '{':
1971                 brac++;
1972                 goto lcopy;
1973
1974         case '$':
1975                 s = 1;
1976                 tok = -1;
1977                 c = Bgetrune(finput);
1978
1979                 /* type description */
1980                 if(c == '<') {
1981                         Bungetrune(finput);
1982                         if(gettok() != TYPENAME)
1983                                 error("bad syntax on $<ident> clause");
1984                         tok = numbval;
1985                         c = Bgetrune(finput);
1986                 }
1987                 if(c == '$') {
1988                         Bprint(faction, "yyval");
1989
1990                         /* put out the proper tag... */
1991                         if(ntypes) {
1992                                 if(tok < 0)
1993                                         tok = fdtype(*prdptr[nprod]);
1994                                 Bprint(faction, ".%s", typeset[tok]);
1995                         }
1996                         goto loop;
1997                 }
1998                 if(c == '-') {
1999                         s = -s;
2000                         c = Bgetrune(finput);
2001                 }
2002                 if(isdigit(c)) {
2003                         j = 0;
2004                         while(isdigit(c)) {
2005                                 j = j*10 + (c-'0');
2006                                 c = Bgetrune(finput);
2007                         }
2008                         Bungetrune(finput);
2009                         j = j*s - offset;
2010                         if(j > 0)
2011                                 error("Illegal use of $%d", j+offset);
2012
2013                 dollar:
2014                         Bprint(faction, "yypt[-%d].yyv", -j);
2015
2016                         /* put out the proper tag */
2017                         if(ntypes) {
2018                                 if(j+offset <= 0 && tok < 0)
2019                                         error("must specify type of $%d", j+offset);
2020                                 if(tok < 0)
2021                                         tok = fdtype(prdptr[nprod][j+offset]);
2022                                 Bprint(faction, ".%s", typeset[tok]);
2023                         }
2024                         goto loop;
2025                 }
2026                 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2027                         int tok; /* tok used oustide for type info */
2028
2029                         /* look for $name */
2030                         Bungetrune(finput);
2031                         if(gettok() != IDENTIFIER)
2032                                 error("$ must be followed by an identifier");
2033                         tok = chfind(2, tokname);
2034                         if((c = Bgetrune(finput)) != '#') {
2035                                 Bungetrune(finput);
2036                                 fnd = -1;
2037                         } else
2038                                 if(gettok() != NUMBER) {
2039                                         error("# must be followed by number");
2040                                         fnd = -1;
2041                                 } else
2042                                         fnd = numbval;
2043                         for(j=1; j<=offset; ++j)
2044                                 if(tok == prdptr[nprod][j]) {
2045                                         if(--fnd <= 0) {
2046                                                 j -= offset;
2047                                                 goto dollar;
2048                                         }
2049                                 }
2050                         error("$name or $name#number not found");
2051                 }
2052                 Bputc(faction, '$');
2053                 if(s < 0 )
2054                         Bputc(faction, '-');
2055                 goto swt;
2056
2057         case '}':
2058                 brac--;
2059                 if(brac)
2060                         goto lcopy;
2061                 Bputrune(faction, c);
2062                 return;
2063
2064         case '/':
2065                 /* look for comments */
2066                 Bputrune(faction, c);
2067                 c = Bgetrune(finput);
2068                 if(c != '*')
2069                         goto swt;
2070
2071                 /* it really is a comment */
2072                 Bputrune(faction, c);
2073                 c = Bgetrune(finput);
2074                 while(c >= 0) {
2075                         while(c == '*') {
2076                                 Bputrune(faction, c);
2077                                 if((c=Bgetrune(finput)) == '/')
2078                                         goto lcopy;
2079                         }
2080                         Bputrune(faction, c);
2081                         if(c == '\n')
2082                                 lineno++;
2083                         c = Bgetrune(finput);
2084                 }
2085                 error("EOF inside comment");
2086
2087         case '\'':
2088                 /* character constant */
2089                 match = '\'';
2090                 goto string;
2091
2092         case '"':
2093                 /* character string */
2094                 match = '"';
2095
2096         string:
2097                 Bputrune(faction, c);
2098                 while(c = Bgetrune(finput)) {
2099                         if(c == '\\') {
2100                                 Bputrune(faction, c);
2101                                 c = Bgetrune(finput);
2102                                 if(c == '\n')
2103                                         lineno++;
2104                         } else {
2105                                 if(c == match)
2106                                         goto lcopy;
2107                                 if(c == '\n')
2108                                         error("newline in string or char. const.");
2109                         }
2110                         Bputrune(faction, c);
2111                 }
2112                 error("EOF in string or character constant");
2113
2114         case Beof:
2115                 error("action does not terminate");
2116
2117         case '\n':
2118                 lineno++;
2119                 goto lcopy;
2120         }
2121
2122 lcopy:
2123         Bputrune(faction, c);
2124         goto loop;
2125 }
2126
2127 void
2128 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2129 {
2130         char buf[256];
2131
2132         if(vflag) {
2133                 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2134                 foutput = Bopen(buf, OWRITE);
2135                 if(foutput == 0)
2136                         error("cannot open %s", buf);
2137                 Blethal(foutput, nil);
2138         }
2139         if(yydebug) {
2140                 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2141                 if((fdebug = Bopen(buf, OWRITE)) == 0)
2142                         error("can't open %s", buf);
2143         }
2144         if(dflag) {
2145                 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2146                 fdefine = Bopen(buf, OWRITE);
2147                 if(fdefine == 0)
2148                         error("can't create %s", buf);
2149                 Blethal(fdefine, nil);
2150         }
2151         if(ytab == 0)
2152                 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2153         else
2154                 strecpy(buf, buf+sizeof buf, ytabc);
2155         ftable = Bopen(buf, OWRITE);
2156         if(ftable == 0)
2157                 error("cannot open table file %s", buf);
2158         Blethal(ftable, nil);
2159 }
2160
2161 /*
2162  * print the output for the states
2163  */
2164 void
2165 output(void)
2166 {
2167         int i, k, c;
2168         Wset *u, *v;
2169
2170         Bprint(ftable, "short   yyexca[] =\n{");
2171         if(fdebug)
2172                 Bprint(fdebug, "char*   yystates[] =\n{\n");
2173
2174         /* output the stuff for state i */
2175         SLOOP(i) {
2176                 nolook = tystate[i]!=MUSTLOOKAHEAD;
2177                 closure(i);
2178
2179                 /* output actions */
2180                 nolook = 1;
2181                 aryfil(temp1, ntokens+nnonter+1, 0);
2182                 WSLOOP(wsets, u) {
2183                         c = *(u->pitem);
2184                         if(c > 1 && c < NTBASE && temp1[c] == 0) {
2185                                 WSLOOP(u, v)
2186                                         if(c == *(v->pitem))
2187                                                 putitem(v->pitem+1, (Lkset*)0);
2188                                 temp1[c] = state(c);
2189                         } else
2190                                 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2191                                         temp1[c+ntokens] = amem[indgo[i]+c];
2192                 }
2193                 if(i == 1)
2194                         temp1[1] = ACCEPTCODE;
2195
2196                 /* now, we have the shifts; look at the reductions */
2197                 lastred = 0;
2198                 WSLOOP(wsets, u) {
2199                         c = *u->pitem;
2200
2201                         /* reduction */
2202                         if(c <= 0) {
2203                                 lastred = -c;
2204                                 TLOOP(k)
2205                                         if(BIT(u->ws.lset, k)) {
2206                                                 if(temp1[k] == 0)
2207                                                         temp1[k] = c;
2208                                                 else
2209                                                 if(temp1[k] < 0) { /* reduce/reduce conflict */
2210                                                         if(foutput)
2211                                                                 Bprint(foutput,
2212                                                                         "\n%d: reduce/reduce conflict"
2213                                                                         " (red'ns %d and %d ) on %s",
2214                                                                         i, -temp1[k], lastred,
2215                                                                         symnam(k));
2216                                                         if(-temp1[k] > lastred)
2217                                                                 temp1[k] = -lastred;
2218                                                         zzrrconf++;
2219                                                 } else
2220                                                         /* potential shift/reduce conflict */
2221                                                         precftn( lastred, k, i );
2222                                         }
2223                                 }
2224                 }
2225                 wract(i);
2226         }
2227
2228         if(fdebug)
2229                 Bprint(fdebug, "};\n");
2230         Bprint(ftable, "};\n");
2231         Bprint(ftable, "#define YYNPROD %d\n", nprod);
2232         Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2233         if(yydebug)
2234                 Bprint(ftable, "#define yydebug %s\n", yydebug);
2235 }
2236
2237 /*
2238  * pack state i from temp1 into amem
2239  */
2240 int
2241 apack(int *p, int n)
2242 {
2243         int *pp, *qq, *rr, off, *q, *r;
2244
2245         /* we don't need to worry about checking because
2246          * we will only look at entries known to be there...
2247          * eliminate leading and trailing 0's
2248          */
2249
2250         q = p+n;
2251         for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2252                 ;
2253         /* no actions */
2254         if(pp > q)
2255                 return 0;
2256         p = pp;
2257
2258         /* now, find a place for the elements from p to q, inclusive */
2259         r = &amem[ACTSIZE-1];
2260         for(rr = amem; rr <= r; rr++, off++) {
2261                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2262                         if(*pp != 0)
2263                                 if(*pp != *qq && *qq != 0)
2264                                         goto nextk;
2265
2266                 /* we have found an acceptable k */
2267                 if(pkdebug && foutput != 0)
2268                         Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2269                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2270                         if(*pp) {
2271                                 if(qq > r)
2272                                         error("action table overflow");
2273                                 if(qq > memp)
2274                                         memp = qq;
2275                                 *qq = *pp;
2276                         }
2277                 if(pkdebug && foutput != 0)
2278                         for(pp = amem; pp <= memp; pp += 10) {
2279                                 Bprint(foutput, "\t");
2280                                 for(qq = pp; qq <= pp+9; qq++)
2281                                         Bprint(foutput, "%d ", *qq);
2282                                 Bprint(foutput, "\n");
2283                         }
2284                 return(off);
2285         nextk:;
2286         }
2287         error("no space in action table");
2288         return 0;
2289 }
2290
2291 /*
2292  * output the gotos for the nontermninals
2293  */
2294 void
2295 go2out(void)
2296 {
2297         int i, j, k, best, count, cbest, times;
2298
2299         /* mark begining of gotos */
2300         Bprint(ftemp, "$\n");
2301         for(i = 1; i <= nnonter; i++) {
2302                 go2gen(i);
2303
2304                 /* find the best one to make default */
2305                 best = -1;
2306                 times = 0;
2307
2308                 /* is j the most frequent */
2309                 for(j = 0; j <= nstate; j++) {
2310                         if(tystate[j] == 0)
2311                                 continue;
2312                         if(tystate[j] == best)
2313                                 continue;
2314
2315                         /* is tystate[j] the most frequent */
2316                         count = 0;
2317                         cbest = tystate[j];
2318                         for(k = j; k <= nstate; k++)
2319                                 if(tystate[k] == cbest)
2320                                         count++;
2321                         if(count > times) {
2322                                 best = cbest;
2323                                 times = count;
2324                         }
2325                 }
2326
2327                 /* best is now the default entry */
2328                 zzgobest += times-1;
2329                 for(j = 0; j <= nstate; j++)
2330                         if(tystate[j] != 0 && tystate[j] != best) {
2331                                 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2332                                 zzgoent++;
2333                         }
2334
2335                 /* now, the default */
2336                 if(best == -1)
2337                         best = 0;
2338                 zzgoent++;
2339                 Bprint(ftemp, "%d\n", best);
2340         }
2341 }
2342
2343 /*
2344  * output the gotos for nonterminal c
2345  */
2346 void
2347 go2gen(int c)
2348 {
2349         int i, work, cc;
2350         Item *p, *q;
2351
2352
2353         /* first, find nonterminals with gotos on c */
2354         aryfil(temp1, nnonter+1, 0);
2355         temp1[c] = 1;
2356         work = 1;
2357         while(work) {
2358                 work = 0;
2359                 PLOOP(0, i)
2360
2361                         /* cc is a nonterminal */
2362                         if((cc=prdptr[i][1]-NTBASE) >= 0)
2363                                 /* cc has a goto on c */
2364                                 if(temp1[cc] != 0) {
2365
2366                                         /* thus, the left side of production i does too */
2367                                         cc = *prdptr[i]-NTBASE;
2368                                         if(temp1[cc] == 0) {
2369                                                   work = 1;
2370                                                   temp1[cc] = 1;
2371                                         }
2372                                 }
2373         }
2374
2375         /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2376         if(g2debug && foutput != 0) {
2377                 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2378                 NTLOOP(i)
2379                         if(temp1[i])
2380                                 Bprint(foutput, "%s ", nontrst[i].name);
2381                 Bprint(foutput, "\n");
2382         }
2383
2384         /* now, go through and put gotos into tystate */
2385         aryfil(tystate, nstate, 0);
2386         SLOOP(i)
2387                 ITMLOOP(i, p, q)
2388                         if((cc = *p->pitem) >= NTBASE)
2389                                 /* goto on c is possible */
2390                                 if(temp1[cc-NTBASE]) {
2391                                         tystate[i] = amem[indgo[i]+c];
2392                                         break;
2393                                 }
2394 }
2395
2396 /*
2397  * decide a shift/reduce conflict by precedence.
2398  * r is a rule number, t a token number
2399  * the conflict is in state s
2400  * temp1[t] is changed to reflect the action
2401  */
2402 void
2403 precftn(int r, int t, int s)
2404 {
2405         int lp, lt, action;
2406
2407         lp = levprd[r];
2408         lt = toklev[t];
2409         if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2410
2411                 /* conflict */
2412                 if(foutput != 0)
2413                         Bprint(foutput,
2414                                 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2415                                 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2416                 zzsrconf++;
2417                 return;
2418         }
2419         if(PLEVEL(lt) == PLEVEL(lp))
2420                 action = ASSOC(lt);
2421         else
2422                 if(PLEVEL(lt) > PLEVEL(lp))
2423                         action = RASC;  /* shift */
2424                 else
2425                         action = LASC;  /* reduce */
2426         switch(action) {
2427         case BASC:  /* error action */
2428                 temp1[t] = ERRCODE;
2429                 break;
2430         case LASC:  /* reduce */
2431                 temp1[t] = -r;
2432                 break;
2433         }
2434 }
2435
2436 /*
2437  * output state i
2438  * temp1 has the actions, lastred the default
2439  */
2440 void
2441 wract(int i)
2442 {
2443         int p, p0, p1, ntimes, tred, count, j, flag;
2444
2445         /* find the best choice for lastred */
2446         lastred = 0;
2447         ntimes = 0;
2448         TLOOP(j) {
2449                 if(temp1[j] >= 0)
2450                         continue;
2451                 if(temp1[j]+lastred == 0)
2452                         continue;
2453                 /* count the number of appearances of temp1[j] */
2454                 count = 0;
2455                 tred = -temp1[j];
2456                 levprd[tred] |= REDFLAG;
2457                 TLOOP(p)
2458                         if(temp1[p]+tred == 0)
2459                                 count++;
2460                 if(count > ntimes) {
2461                         lastred = tred;
2462                         ntimes = count;
2463                 }
2464         }
2465
2466         /*
2467          * for error recovery, arrange that, if there is a shift on the
2468          * error recovery token, `error', that the default be the error action
2469          */
2470         if(temp1[2] > 0)
2471                 lastred = 0;
2472
2473         /* clear out entries in temp1 which equal lastred */
2474         TLOOP(p)
2475                 if(temp1[p]+lastred == 0)
2476                         temp1[p] = 0;
2477
2478         wrstate(i);
2479         defact[i] = lastred;
2480         flag = 0;
2481         TLOOP(p0)
2482                 if((p1=temp1[p0]) != 0) {
2483                         if(p1 < 0) {
2484                                 p1 = -p1;
2485                                 goto exc;
2486                         }
2487                         if(p1 == ACCEPTCODE) {
2488                                 p1 = -1;
2489                                 goto exc;
2490                         }
2491                         if(p1 == ERRCODE) {
2492                                 p1 = 0;
2493                         exc:
2494                                 if(flag++ == 0)
2495                                         Bprint(ftable, "-1, %d,\n", i);
2496                                 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2497                                 zzexcp++;
2498                                 continue;
2499                         }
2500                         Bprint(ftemp, "%d,%d,", p0, p1);
2501                         zzacent++;
2502                 }
2503         if(flag) {
2504                 defact[i] = -2;
2505                 Bprint(ftable, "\t-2, %d,\n", lastred);
2506         }
2507         Bprint(ftemp, "\n");
2508 }
2509
2510 /*
2511  * writes state i
2512  */
2513 void
2514 wrstate(int i)
2515 {
2516         int j0, j1;
2517         Item *pp, *qq;
2518         Wset *u;
2519
2520         if(fdebug) {
2521                 if(lastred) {
2522                         Bprint(fdebug, "        0, /*%d*/\n", i);
2523                 } else {
2524                         Bprint(fdebug, "        \"");
2525                         ITMLOOP(i, pp, qq)
2526                                 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2527                         if(tystate[i] == MUSTLOOKAHEAD)
2528                                 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2529                                         if(*u->pitem < 0)
2530                                                 Bprint(fdebug, "%s\\n", writem(u->pitem));
2531                         Bprint(fdebug, "\", /*%d*/\n", i);
2532                 }
2533         }
2534         if(foutput == 0)
2535                 return;
2536         Bprint(foutput, "\nstate %d\n", i);
2537         ITMLOOP(i, pp, qq)
2538                 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2539         if(tystate[i] == MUSTLOOKAHEAD)
2540                 /* print out empty productions in closure */
2541                 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2542                         if(*u->pitem < 0)
2543                                 Bprint(foutput, "\t%s\n", writem(u->pitem));
2544
2545         /* check for state equal to another */
2546         TLOOP(j0)
2547                 if((j1=temp1[j0]) != 0) {
2548                         Bprint(foutput, "\n\t%s  ", symnam(j0));
2549                         /* shift, error, or accept */
2550                         if(j1 > 0) {
2551                                 if(j1 == ACCEPTCODE)
2552                                         Bprint(foutput,  "accept");
2553                                 else
2554                                         if(j1 == ERRCODE)
2555                                                 Bprint(foutput, "error");
2556                                         else
2557                                                 Bprint(foutput, "shift %d", j1);
2558                         } else
2559                                 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2560                 }
2561
2562         /* output the final production */
2563         if(lastred)
2564                 Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2565                         lastred, rlines[lastred]);
2566         else
2567                 Bprint(foutput, "\n\t.  error\n\n");
2568
2569         /* now, output nonterminal actions */
2570         j1 = ntokens;
2571         for(j0 = 1; j0 <= nnonter; j0++) {
2572                 j1++;
2573                 if(temp1[j1])
2574                         Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2575         }
2576 }
2577
2578 void
2579 warray(char *s, int *v, int n)
2580 {
2581         int i;
2582
2583         Bprint(ftable, "short   %s[] =\n{", s);
2584         for(i=0;;) {
2585                 if(i%10 == 0)
2586                         Bprint(ftable, "\n");
2587                 Bprint(ftable, "%4d", v[i]);
2588                 i++;
2589                 if(i >= n) {
2590                         Bprint(ftable, "\n};\n");
2591                         break;
2592                 }
2593                 Bprint(ftable, ",");
2594         }
2595 }
2596
2597 /*
2598  * in order to free up the mem and amem arrays for the optimizer,
2599  * and still be able to output yyr1, etc., after the sizes of
2600  * the action array is known, we hide the nonterminals
2601  * derived by productions in levprd.
2602  */
2603
2604 void
2605 hideprod(void)
2606 {
2607         int i, j;
2608
2609         j = 0;
2610         levprd[0] = 0;
2611         PLOOP(1, i) {
2612                 if(!(levprd[i] & REDFLAG)) {
2613                         j++;
2614                         if(foutput != 0)
2615                                 Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2616                 }
2617                 levprd[i] = *prdptr[i] - NTBASE;
2618         }
2619         if(j)
2620                 print("%d rules never reduced\n", j);
2621 }
2622
2623 void
2624 callopt(void)
2625 {
2626         int i, *p, j, k, *q;
2627
2628         /* read the arrays from tempfile and set parameters */
2629         finput = Bopen(tempname, OREAD);
2630         if(finput == 0)
2631                 error("optimizer cannot open tempfile");
2632         Blethal(finput, nil);
2633
2634         pgo[0] = 0;
2635         temp1[0] = 0;
2636         nstate = 0;
2637         nnonter = 0;
2638         for(;;) {
2639                 switch(gtnm()) {
2640                 case '\n':
2641                         nstate++;
2642                         pmem--;
2643                         temp1[nstate] = pmem - mem0;
2644                 case ',':
2645                         continue;
2646                 case '$':
2647                         break;
2648                 default:
2649                         error("bad tempfile");
2650                 }
2651                 break;
2652         }
2653
2654         pmem--;
2655         temp1[nstate] = yypgo[0] = pmem - mem0;
2656         for(;;) {
2657                 switch(gtnm()) {
2658                 case '\n':
2659                         nnonter++;
2660                         yypgo[nnonter] = pmem-mem0;
2661                 case ',':
2662                         continue;
2663                 case -1:
2664                         break;
2665                 default:
2666                         error("bad tempfile");
2667                 }
2668                 break;
2669         }
2670         pmem--;
2671         yypgo[nnonter--] = pmem - mem0;
2672         for(i = 0; i < nstate; i++) {
2673                 k = 32000;
2674                 j = 0;
2675                 q = mem0 + temp1[i+1];
2676                 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2677                         if(*p > j)
2678                                 j = *p;
2679                         if(*p < k)
2680                                 k = *p;
2681                 }
2682                 /* nontrivial situation */
2683                 if(k <= j) {
2684                         /* j is now the range */
2685 /*                      j -= k;                 /* call scj */
2686                         if(k > maxoff)
2687                                 maxoff = k;
2688                 }
2689                 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2690                 if(j > maxspr)
2691                         maxspr = j;
2692         }
2693
2694         /* initialize ggreed table */
2695         for(i = 1; i <= nnonter; i++) {
2696                 ggreed[i] = 1;
2697                 j = 0;
2698
2699                 /* minimum entry index is always 0 */
2700                 q = mem0 + yypgo[i+1] - 1;
2701                 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2702                         ggreed[i] += 2;
2703                         if(*p > j)
2704                                 j = *p;
2705                 }
2706                 ggreed[i] = ggreed[i] + 2*j;
2707                 if(j > maxoff)
2708                         maxoff = j;
2709         }
2710
2711         /* now, prepare to put the shift actions into the amem array */
2712         for(i = 0; i < ACTSIZE; i++)
2713                 amem[i] = 0;
2714         maxa = amem;
2715         for(i = 0; i < nstate; i++) {
2716                 if(tystate[i] == 0 && adb > 1)
2717                         Bprint(ftable, "State %d: null\n", i);
2718                 indgo[i] = YYFLAG1;
2719         }
2720         while((i = nxti()) != NOMORE)
2721                 if(i >= 0)
2722                         stin(i);
2723                 else
2724                         gin(-i);
2725
2726         /* print amem array */
2727         if(adb > 2 )
2728                 for(p = amem; p <= maxa; p += 10) {
2729                         Bprint(ftable, "%4d  ", (int)(p-amem));
2730                         for(i = 0; i < 10; ++i)
2731                                 Bprint(ftable, "%4d  ", p[i]);
2732                         Bprint(ftable, "\n");
2733                 }
2734
2735         /* write out the output appropriate to the language */
2736         aoutput();
2737         osummary();
2738         ZAPFILE(ftemp, tempname);
2739 }
2740
2741 void
2742 gin(int i)
2743 {
2744         int *p, *r, *s, *q1, *q2;
2745
2746         /* enter gotos on nonterminal i into array amem */
2747         ggreed[i] = 0;
2748
2749         q2 = mem0+ yypgo[i+1] - 1;
2750         q1 = mem0 + yypgo[i];
2751
2752         /* now, find amem place for it */
2753         for(p = amem; p < &amem[ACTSIZE]; p++) {
2754                 if(*p)
2755                         continue;
2756                 for(r = q1; r < q2; r += 2) {
2757                         s = p + *r + 1;
2758                         if(*s)
2759                                 goto nextgp;
2760                         if(s > maxa)
2761                                 if((maxa = s) > &amem[ACTSIZE])
2762                                         error("a array overflow");
2763                 }
2764                 /* we have found amem spot */
2765                 *p = *q2;
2766                 if(p > maxa)
2767                         if((maxa = p) > &amem[ACTSIZE])
2768                                 error("a array overflow");
2769                 for(r = q1; r < q2; r += 2) {
2770                         s = p + *r + 1;
2771                         *s = r[1];
2772                 }
2773                 pgo[i] = p-amem;
2774                 if(adb > 1)
2775                         Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2776                 return;
2777
2778         nextgp:;
2779         }
2780         error("cannot place goto %d\n", i);
2781 }
2782
2783 void
2784 stin(int i)
2785 {
2786         int *r, *s, n, flag, j, *q1, *q2;
2787
2788         tystate[i] = 0;
2789
2790         /* enter state i into the amem array */
2791         q2 = mem0+temp1[i+1];
2792         q1 = mem0+temp1[i];
2793         /* find an acceptable place */
2794         for(n = -maxoff; n < ACTSIZE; n++) {
2795                 flag = 0;
2796                 for(r = q1; r < q2; r += 2) {
2797                         if((s = *r + n + amem) < amem)
2798                                 goto nextn;
2799                         if(*s == 0)
2800                                 flag++;
2801                         else
2802                                 if(*s != r[1])
2803                                         goto nextn;
2804                 }
2805
2806                 /* check that the position equals another only if the states are identical */
2807                 for(j=0; j<nstate; j++) {
2808                         if(indgo[j] == n) {
2809
2810                                 /* we have some disagreement */
2811                                 if(flag)
2812                                         goto nextn;
2813                                 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2814
2815                                         /* states are equal */
2816                                         indgo[i] = n;
2817                                         if(adb > 1)
2818                                                 Bprint(ftable,
2819                                                 "State %d: entry at %d equals state %d\n",
2820                                                 i, n, j);
2821                                         return;
2822                                 }
2823
2824                                 /* we have some disagreement */
2825                                 goto nextn;
2826                         }
2827                 }
2828
2829                 for(r = q1; r < q2; r += 2) {
2830                         if((s = *r+n+amem) >= &amem[ACTSIZE])
2831                                 error("out of space in optimizer a array");
2832                         if(s > maxa)
2833                                 maxa = s;
2834                         if(*s != 0 && *s != r[1])
2835                                 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2836                         *s = r[1];
2837                 }
2838                 indgo[i] = n;
2839                 if(adb > 1)
2840                         Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2841                 return;
2842         nextn:;
2843         }
2844         error("Error; failure to place state %d\n", i);
2845 }
2846
2847 /*
2848  * finds the next i
2849  */
2850 int
2851 nxti(void)
2852 {
2853         int i, max, maxi;
2854
2855         max = 0;
2856         maxi = 0;
2857         for(i = 1; i <= nnonter; i++)
2858                 if(ggreed[i] >= max) {
2859                         max = ggreed[i];
2860                         maxi = -i;
2861                 }
2862         for(i = 0; i < nstate; ++i)
2863                 if(tystate[i] >= max) {
2864                         max = tystate[i];
2865                         maxi = i;
2866                 }
2867         if(nxdb)
2868                 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2869         if(max == 0)
2870                 return NOMORE;
2871         return maxi;
2872 }
2873
2874 /*
2875  * write summary
2876  */
2877 void
2878 osummary(void)
2879 {
2880
2881         int i, *p;
2882
2883         if(foutput == 0)
2884                 return;
2885         i = 0;
2886         for(p = maxa; p >= amem; p--)
2887                 if(*p == 0)
2888                         i++;
2889
2890         Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2891                 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2892         Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2893         Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2894 }
2895
2896 /*
2897  * this version is for C
2898  * write out the optimized parser
2899  */
2900 void
2901 aoutput(void)
2902 {
2903         Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2904         arout("yyact", amem, (maxa-amem)+1);
2905         arout("yypact", indgo, nstate);
2906         arout("yypgo", pgo, nnonter+1);
2907 }
2908
2909 void
2910 arout(char *s, int *v, int n)
2911 {
2912         int i;
2913
2914         Bprint(ftable, "short   %s[] =\n{", s);
2915         for(i = 0; i < n;) {
2916                 if(i%10 == 0)
2917                         Bprint(ftable, "\n");
2918                 Bprint(ftable, "%4d", v[i]);
2919                 i++;
2920                 if(i == n)
2921                         Bprint(ftable, "\n};\n");
2922                 else
2923                         Bprint(ftable, ",");
2924         }
2925 }
2926
2927 /*
2928  * read and convert an integer from the standard input
2929  * return the terminating character
2930  * blanks, tabs, and newlines are ignored
2931  */
2932 int
2933 gtnm(void)
2934 {
2935         int sign, val, c;
2936
2937         sign = 0;
2938         val = 0;
2939         while((c=Bgetrune(finput)) != Beof) {
2940                 if(isdigit(c)) {
2941                         val = val*10 + c-'0';
2942                         continue;
2943                 }
2944                 if(c == '-') {
2945                         sign = 1;
2946                         continue;
2947                 }
2948                 break;
2949         }
2950         if(sign)
2951                 val = -val;
2952         *pmem++ = val;
2953         if(pmem >= &mem0[MEMSIZE])
2954                 error("out of space");
2955         return c;
2956 }