]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/yacc.c
rio, kbdfs: increase read buffer for high latency kbdfs support
[plan9front.git] / sys / src / cmd / yacc.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune      Bungetc         /* ok for now. */
7
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET         ((32+NTERMS)/32)        /* BOTCH?? +31 */
12 #define BIT(a,i)        ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)     ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)       (((n)+32)/32)
15
16 #define PARSER          "/sys/lib/yaccpar"
17 #define PARSERS         "/sys/lib/yaccpars"
18 #define TEMPNAME        "y.tmp.XXXXXX"
19 #define ACTNAME         "y.acts.XXXXXX"
20 #define OFILE           "tab.c"
21 #define FILEU           "output"
22 #define FILED           "tab.h"
23 #define FILEDEBUG       "debug"
24
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31         ACTSIZE         = 40000,
32         MEMSIZE         = 40000,
33         NSTATES         = 2000,
34         NTERMS          = 511,
35         NPROD           = 1600,
36         NNONTERM        = 600,
37         TEMPSIZE        = 2000,
38         CNAMSZ          = 10000,
39         LSETSIZE        = 2400,
40         WSETSIZE        = 350,
41
42         NAMESIZE        = 50,
43         NTYPES          = 63,
44         ISIZE           = 400,
45
46         PRIVATE         = 0xE000,       /* unicode private use */
47
48         /* relationships which must hold:
49                 TBITSET ints must hold NTERMS+1 bits...
50                 WSETSIZE >= NNONTERM
51                 LSETSIZE >= NNONTERM
52                 TEMPSIZE >= NTERMS + NNONTERM + 1
53                 TEMPSIZE >= NSTATES
54         */
55
56         NTBASE          = 010000,
57         ERRCODE         = 8190,
58         ACCEPTCODE      = 8191,
59
60         NOASC           = 0,    /* no assoc. */
61         LASC            = 1,    /* left assoc. */
62         RASC            = 2,    /* right assoc. */
63         BASC            = 3,    /* binary assoc. */
64
65         /* flags for state generation */
66
67         DONE            = 0,
68         MUSTDO          = 1,
69         MUSTLOOKAHEAD   = 2,
70
71         /* flags for a rule having an action, and being reduced */
72
73         ACTFLAG         = 04,
74         REDFLAG         = 010,
75
76         /* output parser flags */
77         YYFLAG1         = -1000,
78
79         /* parse tokens */
80         IDENTIFIER      = PRIVATE,
81         MARK,
82         TERM,
83         LEFT,
84         RIGHT,
85         BINARY,
86         PREC,
87         LCURLY,
88         IDENTCOLON,
89         NUMBER,
90         START,
91         TYPEDEF,
92         TYPENAME,
93         UNION,
94
95         ENDFILE         = 0,
96
97         EMPTY           = 1,
98         WHOKNOWS        = 0,
99         OK              = 1,
100         NOMORE          = -1000,
101 };
102
103         /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i)        ((i)&03)
106 #define PLEVEL(i)       (((i)>>4)&077)
107 #define TYPE(i)         (((i)>>10)&077)
108
109         /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j)     i |= j
112 #define SETPLEV(i,j)    i |= (j<<4)
113 #define SETTYPE(i,j)    i |= (j<<10)
114
115         /* looping macros */
116
117 #define TLOOP(i)        for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)       for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)      for(i=s; i<nprod; i++)
120 #define SLOOP(i)        for(i=0; i<nstate; i++)
121 #define WSBUMP(x)       x++
122 #define WSLOOP(s,j)     for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)  for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)      for(i=0; i<tbitset; i++)
125
126         /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(bfd, x) {\
129                 if(bfd) Bterm(bfd); \
130                 if(x) remove(x); \
131         }
132
133         /* I/O descriptors */
134
135 Biobuf* faction;        /* file for saving actions */
136 Biobuf* fdefine;        /* file for #defines */
137 Biobuf* fdebug;         /* y.debug for strings for debugging */
138 Biobuf* ftable;         /* y.tab.c file */
139 Biobuf* ftemp;          /* tempfile to pass 2 */
140 Biobuf* finput;         /* input file */
141 Biobuf* foutput;        /* y.output file */
142
143         /* communication variables between various I/O routines */
144
145 char*   infile;                 /* input file name */
146 char*   inpath;                 /* input full path */
147 int     numbval;                /* value of an input number */
148 char    tokname[NAMESIZE+UTFmax+1];     /* input token name, slop for runes and 0 */
149
150         /* structure declarations */
151
152 typedef
153 struct
154 {
155         int     lset[TBITSET];
156 } Lkset;
157
158 typedef
159 struct
160 {
161         int*    pitem;
162         Lkset*  look;
163 } Item;
164
165 typedef
166 struct
167 {
168         char*   name;
169         int     value;
170 } Symb;
171
172 typedef
173 struct
174 {
175         int*    pitem;
176         int     flag;
177         Lkset   ws;
178 } Wset;
179
180         /* storage of names */
181
182 char    cnames[CNAMSZ];         /* place where token and nonterminal names are stored */
183 int     cnamsz = CNAMSZ;        /* size of cnames */
184 char*   cnamp = cnames;         /* place where next name is to be put in */
185 int     ndefout = 4;            /* number of defined symbols output */
186 char*   tempname;
187 char*   actname;
188 char    ttempname[] = TEMPNAME;
189 char    tactname[] = ACTNAME;
190 char*   parser = PARSER;
191 char*   yydebug;
192
193         /* storage of types */
194 int     ntypes;                 /* number of types defined */
195 char*   typeset[NTYPES];        /* pointers to type tags */
196
197         /* token information */
198
199 int     ntokens = 0 ;           /* number of tokens */
200 Symb    tokset[NTERMS];
201 int     toklev[NTERMS];         /* vector with the precedence of the terminals */
202
203         /* nonterminal information */
204
205 int     nnonter = -1;           /* the number of nonterminals */
206 Symb    nontrst[NNONTERM];
207 int     start;                  /* start symbol */
208
209         /* assigned token type values */
210 int     extval = 0;
211
212 char*   ytabc = OFILE;  /* name of y.tab.c */
213
214         /* grammar rule information */
215
216 int     mem0[MEMSIZE] ;         /* production storage */
217 int*    mem = mem0;
218 int     nprod = 1;              /* number of productions */
219 int*    prdptr[NPROD];          /* pointers to descriptions of productions */
220 int     levprd[NPROD];          /* precedence levels for the productions */
221 int     rlines[NPROD];          /* line number for this rule */
222
223         /* state information */
224
225 int     nstate = 0;             /* number of states */
226 Item*   pstate[NSTATES+2];      /* pointers to the descriptions of the states */
227 int     tystate[NSTATES];       /* contains type information about the states */
228 int     defact[NSTATES];        /* the default actions of states */
229 int     tstates[NTERMS];        /* states generated by terminal gotos */
230 int     ntstates[NNONTERM];     /* states generated by nonterminal gotos */
231 int     mstates[NSTATES];       /* chain of overflows of term/nonterm generation lists  */
232 int     lastred;                /* the number of the last reduction of a state */
233
234         /* lookahead set information */
235
236 Lkset   lkst[LSETSIZE];
237 int     nolook;                 /* flag to turn off lookahead computations */
238 int     tbitset;                /* size of lookahead sets */
239 int     nlset = 0;              /* next lookahead set index */
240 int     nolook = 0;             /* flag to suppress lookahead computations */
241 Lkset   clset;                  /* temporary storage for lookahead computations */
242
243         /* working set information */
244
245 Wset    wsets[WSETSIZE];
246 Wset*   cwp;
247
248         /* storage for action table */
249
250 int     amem[ACTSIZE];          /* action table storage */
251 int*    memp = amem;            /* next free action table position */
252 int     indgo[NSTATES];         /* index to the stored goto table */
253
254         /* temporary vector, indexable by states, terms, or ntokens */
255
256 int     temp1[TEMPSIZE];        /* temporary storage, indexed by terms + ntokens or states */
257 int     lineno = 1;             /* current input line number */
258 int     fatfl = 1;              /* if on, error is fatal */
259 int     nerrors = 0;            /* number of errors */
260
261         /* statistics collection variables */
262
263 int     zzgoent;
264 int     zzgobest;
265 int     zzacent;
266 int     zzexcp;
267 int     zzclose;
268 int     zzrrconf;
269 int     zzsrconf;
270
271 int*    ggreed = lkst[0].lset;
272 int*    pgo = wsets[0].ws.lset;
273 int*    yypgo = &nontrst[0].value;
274
275 int     maxspr = 0;             /* maximum spread of any entry */
276 int     maxoff = 0;             /* maximum offset into a array */
277 int*    pmem = mem0;
278 int*    maxa;
279 int     nxdb = 0;
280 int     adb = 0;
281
282
283         /* storage for information about the nonterminals */
284
285 int**   pres[NNONTERM+2];       /* vector of pointers to productions yielding each nonterminal */
286 Lkset*  pfirst[NNONTERM+2];     /* vector of pointers to first sets for each nonterminal */
287 int     pempty[NNONTERM+1];     /* vector of nonterminals nontrivially deriving e */
288
289         /* random stuff picked out from between functions */
290
291 int     indebug = 0;
292 Wset*   zzcwp = wsets;
293 int     zzgoent = 0;
294 int     zzgobest = 0;
295 int     zzacent = 0;
296 int     zzexcp = 0;
297 int     zzclose = 0;
298 int     zzsrconf = 0;
299 int*    zzmemsz = mem0;
300 int     zzrrconf = 0;
301 int     pidebug = 0;            /* debugging flag for putitem */
302 int     gsdebug = 0;
303 int     cldebug = 0;            /* debugging flag for closure */
304 int     pkdebug = 0;
305 int     g2debug = 0;
306
307 struct
308 {
309         char*   name;
310         long    value;
311 } resrv[] =
312 {
313         "binary",       BINARY,
314         "left",         LEFT,
315         "nonassoc",     BINARY,
316         "prec",         PREC,
317         "right",        RIGHT,
318         "start",        START,
319         "term",         TERM,
320         "token",        TERM,
321         "type",         TYPEDEF,
322         "union",        UNION,
323         0,
324 };
325
326         /* define functions */
327
328 void    main(int, char**);
329 void    others(void);
330 char*   chcopy(char*, char*);
331 char*   writem(int*);
332 char*   symnam(int);
333 void    summary(void);
334 void    error(char*, ...);
335 void    aryfil(int*, int, int);
336 int     setunion(int*, int*);
337 void    prlook(Lkset*);
338 void    cpres(void);
339 void    cpfir(void);
340 int     state(int);
341 void    putitem(int*, Lkset*);
342 void    cempty(void);
343 void    stagen(void);
344 void    closure(int);
345 Lkset*  flset(Lkset*);
346 void    cleantmp(void);
347 void    intr(void);
348 void    setup(int, char**);
349 void    finact(void);
350 int     defin(int, char*);
351 void    defout(int);
352 char*   cstash(char*);
353 long    gettok(void);
354 int     fdtype(int);
355 int     chfind(int, char*);
356 void    cpyunion(void);
357 void    cpycode(void);
358 int     skipcom(void);
359 void    cpyact(int);
360 void    openup(char*, int, int, int, char*);
361 void    output(void);
362 int     apack(int*, int);
363 void    go2out(void);
364 void    go2gen(int);
365 void    precftn(int, int, int);
366 void    wract(int);
367 void    wrstate(int);
368 void    warray(char*, int*, int);
369 void    hideprod(void);
370 void    callopt(void);
371 void    gin(int);
372 void    stin(int);
373 int     nxti(void);
374 void    osummary(void);
375 void    aoutput(void);
376 void    arout(char*, int*, int);
377 int     gtnm(void);
378
379 void
380 main(int argc, char *argv[])
381 {
382
383         setup(argc, argv);      /* initialize and read productions */
384         tbitset = NWORDS(ntokens);
385         cpres();                /* make table of which productions yield a given nonterminal */
386         cempty();               /* make a table of which nonterminals can match the empty string */
387         cpfir();                /* make a table of firsts of nonterminals */
388         stagen();               /* generate the states */
389         output();               /* write the states and the tables */
390         go2out();
391         hideprod();
392         summary();
393         callopt();
394         others();
395         exits(0);
396 }
397
398 /*
399  * put out other arrays, copy the parsers
400  */
401 void
402 others(void)
403 {
404         int c, i, j;
405
406         finput = Bopen(parser, OREAD);
407         if(finput == 0)
408                 error("cannot find parser %s", parser);
409         Blethal(finput, nil);
410         warray("yyr1", levprd, nprod);
411         aryfil(temp1, nprod, 0);
412         PLOOP(1, i)
413                 temp1[i] = prdptr[i+1]-prdptr[i]-2;
414         warray("yyr2", temp1, nprod);
415
416         aryfil(temp1, nstate, -1000);
417         TLOOP(i)
418                 for(j=tstates[i]; j!=0; j=mstates[j])
419                         temp1[j] = i;
420         NTLOOP(i)
421                 for(j=ntstates[i]; j!=0; j=mstates[j])
422                         temp1[j] = -i;
423         warray("yychk", temp1, nstate);
424         warray("yydef", defact, nstate);
425
426         /* put out token translation tables */
427         /* table 1 has 0-256 */
428         aryfil(temp1, 256, 0);
429         c = 0;
430         TLOOP(i) {
431                 j = tokset[i].value;
432                 if(j >= 0 && j < 256) {
433                         if(temp1[j]) {
434                                 print("yacc bug -- cant have 2 different Ts with same value\n");
435                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
436                                 nerrors++;
437                         }
438                         temp1[j] = i;
439                         if(j > c)
440                                 c = j;
441                 }
442         }
443         warray("yytok1", temp1, c+1);
444
445         /* table 2 has PRIVATE-PRIVATE+256 */
446         aryfil(temp1, 256, 0);
447         c = 0;
448         TLOOP(i) {
449                 j = tokset[i].value - PRIVATE;
450                 if(j >= 0 && j < 256) {
451                         if(temp1[j]) {
452                                 print("yacc bug -- cant have 2 different Ts with same value\n");
453                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
454                                 nerrors++;
455                         }
456                         temp1[j] = i;
457                         if(j > c)
458                                 c = j;
459                 }
460         }
461         warray("yytok2", temp1, c+1);
462
463         /* table 3 has everything else */
464         Bprint(ftable, "long    yytok3[] =\n{\n");
465         c = 0;
466         TLOOP(i) {
467                 j = tokset[i].value;
468                 if(j >= 0 && j < 256)
469                         continue;
470                 if(j >= PRIVATE && j < 256+PRIVATE)
471                         continue;
472
473                 Bprint(ftable, "%4d,%4d,", j, i);
474                 c++;
475                 if(c%5 == 0)
476                         Bprint(ftable, "\n");
477         }
478         Bprint(ftable, "%4d\n};\n", 0);
479
480         /* copy parser text */
481         Bprint(ftable, "\n#line\t1\t\"%s\"\n", parser);
482         while((c=Bgetrune(finput)) != Beof) {
483                 if(c == '$') {
484                         if((c = Bgetrune(finput)) != 'A')
485                                 Bputrune(ftable, '$');
486                         else { /* copy actions */
487                                 faction = Bopen(actname, OREAD);
488                                 if(faction == 0)
489                                         error("cannot reopen action tempfile");
490                                 Blethal(faction, nil);
491                                 while((c=Bgetrune(faction)) != Beof)
492                                         Bputrune(ftable, c);
493                                 ZAPFILE(faction, actname);
494                                 c = Bgetrune(finput);
495                         }
496                 }
497                 Bputrune(ftable, c);
498         }
499         Bterm(ftable);
500 }
501
502 /*
503  * copies string q into p, returning next free char ptr
504  */
505 char*
506 chcopy(char* p, char* q)
507 {
508         int c;
509
510         while(c = *q) {
511                 if(c == '"')
512                         *p++ = '\\';
513                 *p++ = c;
514                 q++;
515         }
516         *p = 0;
517         return p;
518 }
519
520 /*
521  * creates output string for item pointed to by pp
522  */
523 char*
524 writem(int *pp)
525 {
526         int i,*p;
527         static char sarr[ISIZE];
528         char* q;
529
530         for(p=pp; *p>0; p++)
531                 ;
532         p = prdptr[-*p];
533         q = chcopy(sarr, nontrst[*p-NTBASE].name);
534         q = chcopy(q, ": ");
535         for(;;) {
536                 *q = ' ';
537                 p++;
538                 if(p == pp)
539                         *q = '.';
540                 q++;
541                 *q = '\0';
542                 i = *p;
543                 if(i <= 0)
544                         break;
545                 q = chcopy(q, symnam(i));
546                 if(q > &sarr[ISIZE-30])
547                         error("item too big");
548         }
549
550         /* an item calling for a reduction */
551         i = *pp;
552         if(i < 0 ) {
553                 q = chcopy(q, "    (");
554                 sprint(q, "%d)", -i);
555         }
556         return sarr;
557 }
558
559 /*
560  * return a pointer to the name of symbol i
561  */
562 char*
563 symnam(int i)
564 {
565         char* cp;
566
567         cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
568         if(*cp == ' ')
569                 cp++;
570         return cp;
571 }
572
573 /*
574  * output the summary on y.output
575  */
576 void
577 summary(void)
578 {
579
580         if(foutput != 0) {
581                 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
582                         ntokens, NTERMS, nnonter, NNONTERM);
583                 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
584                         nprod, NPROD, nstate, NSTATES);
585                 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
586                         zzsrconf, zzrrconf);
587                 Bprint(foutput, "%d/%d working sets used\n",
588                         (int)(zzcwp-wsets), WSETSIZE);
589                 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
590                         (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
591                 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
592                 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
593                 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
594                 Bprint(foutput, "%d goto entries\n", zzgoent);
595                 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
596         }
597         if(zzsrconf != 0 || zzrrconf != 0) {
598                 print("\nconflicts: ");
599                 if(zzsrconf)
600                         print("%d shift/reduce", zzsrconf);
601                 if(zzsrconf && zzrrconf)
602                         print(", ");
603                 if(zzrrconf)
604                         print("%d reduce/reduce", zzrrconf);
605                 print("\n");
606         }
607         if(ftemp != 0) {
608                 Bterm(ftemp);
609                 ftemp = 0;
610         }
611         if(fdefine != 0) {
612                 Bterm(fdefine);
613                 fdefine = 0;
614         }
615 }
616
617 /*
618  * write out error comment -- NEEDS WORK
619  */
620 void
621 error(char *s, ...)
622 {
623         va_list ap;
624
625         nerrors++;
626         va_start(ap, s);
627         fprint(2, "%s:%d: fatal error: ", infile, lineno);
628         vfprint(2, s, ap);
629         fprint(2, "\n");
630         va_end(ap);
631         if(!fatfl)
632                 return;
633         summary();
634         cleantmp();
635         exits("error");
636 }
637
638 /*
639  * set elements 0 through n-1 to c
640  */
641 void
642 aryfil(int *v, int n, int c)
643 {
644         int i;
645
646         for(i=0; i<n; i++)
647                 v[i] = c;
648 }
649
650 /*
651  * set a to the union of a and b
652  * return 1 if b is not a subset of a, 0 otherwise
653  */
654 int
655 setunion(int *a, int *b)
656 {
657         int i, x, sub;
658
659         sub = 0;
660         SETLOOP(i) {
661                 x = *a;
662                 *a |= *b;
663                 if(*a != x)
664                         sub = 1;
665                 a++;
666                 b++;
667         }
668         return sub;
669 }
670
671 void
672 prlook(Lkset* p)
673 {
674         int j, *pp;
675
676         pp = p->lset;
677         if(pp == 0)
678                 Bprint(foutput, "\tNULL");
679         else {
680                 Bprint(foutput, " { ");
681                 TLOOP(j)
682                         if(BIT(pp,j))
683                                 Bprint(foutput, "%s ", symnam(j));
684                 Bprint(foutput, "}");
685         }
686 }
687
688 /*
689  * compute an array with the beginnings of  productions yielding given nonterminals
690  * The array pres points to these lists
691  * the array pyield has the lists: the total size is only NPROD+1
692  */
693 void
694 cpres(void)
695 {
696         int c, j, i, **pmem;
697         static int *pyield[NPROD];
698
699         pmem = pyield;
700         NTLOOP(i) {
701                 c = i+NTBASE;
702                 pres[i] = pmem;
703                 fatfl = 0;      /* make undefined  symbols  nonfatal */
704                 PLOOP(0, j)
705                         if(*prdptr[j] == c)
706                                 *pmem++ =  prdptr[j]+1;
707                 if(pres[i] == pmem)
708                         error("nonterminal %s not defined!", nontrst[i].name);
709         }
710         pres[i] = pmem;
711         fatfl = 1;
712         if(nerrors) {
713                 summary();
714                 cleantmp();
715                 exits("error");
716         }
717         if(pmem != &pyield[nprod])
718                 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
719 }
720
721 /*
722  * compute an array with the first of nonterminals
723  */
724 void
725 cpfir(void)
726 {
727         int *p, **s, i, **t, ch, changes;
728
729         zzcwp = &wsets[nnonter];
730         NTLOOP(i) {
731                 aryfil(wsets[i].ws.lset, tbitset, 0);
732                 t = pres[i+1];
733                 /* initially fill the sets */
734                 for(s=pres[i]; s<t; ++s)
735                         for(p = *s; (ch = *p) > 0; ++p) {
736                                 if(ch < NTBASE) {
737                                         SETBIT(wsets[i].ws.lset, ch);
738                                         break;
739                                 }
740                                 if(!pempty[ch-NTBASE])
741                                         break;
742                         }
743         }
744
745         /* now, reflect transitivity */
746         changes = 1;
747         while(changes) {
748                 changes = 0;
749                 NTLOOP(i) {
750                         t = pres[i+1];
751                         for(s = pres[i]; s < t; ++s)
752                                 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753                                         changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754                                         if(!pempty[ch])
755                                                 break;
756                                 }
757                 }
758         }
759
760         NTLOOP(i)
761                 pfirst[i] = flset(&wsets[i].ws);
762         if(!indebug)
763                 return;
764         if(foutput != 0)
765                 NTLOOP(i) {
766                         Bprint(foutput, "\n%s: ", nontrst[i].name);
767                         prlook(pfirst[i]);
768                         Bprint(foutput, " %d\n", pempty[i]);
769                 }
770 }
771
772 /*
773  * sorts last state,and sees if it equals earlier ones. returns state number
774  */
775 int
776 state(int c)
777 {
778         Item *p1, *p2, *k, *l, *q1, *q2;
779         int size1, size2, i;
780
781         p1 = pstate[nstate];
782         p2 = pstate[nstate+1];
783         if(p1 == p2)
784                 return 0;       /* null state */
785         /* sort the items */
786         for(k = p2-1; k > p1; k--)      /* make k the biggest */
787                 for(l = k-1; l >= p1; --l)
788                         if(l->pitem > k->pitem) {
789                                 int *s;
790                                 Lkset *ss;
791
792                                 s = k->pitem;
793                                 k->pitem = l->pitem;
794                                 l->pitem = s;
795                                 ss = k->look;
796                                 k->look = l->look;
797                                 l->look = ss;
798                         }
799         size1 = p2 - p1;        /* size of state */
800
801         for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802                 /* get ith state */
803                 q1 = pstate[i];
804                 q2 = pstate[i+1];
805                 size2 = q2 - q1;
806                 if(size1 != size2)
807                         continue;
808                 k = p1;
809                 for(l = q1; l < q2; l++) {
810                         if(l->pitem != k->pitem)
811                                 break;
812                         k++;
813                 }
814                 if(l != q2)
815                         continue;
816                 /* found it */
817                 pstate[nstate+1] = pstate[nstate];      /* delete last state */
818                 /* fix up lookaheads */
819                 if(nolook)
820                         return i;
821                 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822                         int s;
823
824                         SETLOOP(s)
825                                 clset.lset[s] = l->look->lset[s];
826                         if(setunion(clset.lset, k->look->lset)) {
827                                 tystate[i] = MUSTDO;
828                                 /* register the new set */
829                                 l->look = flset( &clset );
830                         }
831                 }
832                 return i;
833         }
834         /* state is new */
835         if(nolook)
836                 error("yacc state/nolook error");
837         pstate[nstate+2] = p2;
838         if(nstate+1 >= NSTATES)
839                 error("too many states");
840         if(c >= NTBASE) {
841                 mstates[nstate] = ntstates[c-NTBASE];
842                 ntstates[c-NTBASE] = nstate;
843         } else {
844                 mstates[nstate] = tstates[c];
845                 tstates[c] = nstate;
846         }
847         tystate[nstate] = MUSTDO;
848         return nstate++;
849 }
850
851 void
852 putitem(int *ptr, Lkset *lptr)
853 {
854         Item *j;
855
856         if(pidebug && foutput != 0)
857                 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858         j = pstate[nstate+1];
859         j->pitem = ptr;
860         if(!nolook)
861                 j->look = flset(lptr);
862         pstate[nstate+1] = ++j;
863         if((int*)j > zzmemsz) {
864                 zzmemsz = (int*)j;
865                 if(zzmemsz >=  &mem0[MEMSIZE])
866                         error("out of state space");
867         }
868 }
869
870 /*
871  * mark nonterminals which derive the empty string
872  * also, look for nonterminals which don't derive any token strings
873  */
874 void
875 cempty(void)
876 {
877
878         int i, *p;
879
880         /* first, use the array pempty to detect productions that can never be reduced */
881         /* set pempty to WHONOWS */
882         aryfil(pempty, nnonter+1, WHOKNOWS);
883
884         /* now, look at productions, marking nonterminals which derive something */
885 more:
886         PLOOP(0, i) {
887                 if(pempty[*prdptr[i] - NTBASE])
888                         continue;
889                 for(p = prdptr[i]+1; *p >= 0; ++p)
890                         if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891                                 break;
892                 /* production can be derived */
893                 if(*p < 0) {
894                         pempty[*prdptr[i]-NTBASE] = OK;
895                         goto more;
896                 }
897         }
898
899         /* now, look at the nonterminals, to see if they are all OK */
900         NTLOOP(i) {
901                 /* the added production rises or falls as the start symbol ... */
902                 if(i == 0)
903                         continue;
904                 if(pempty[i] != OK) {
905                         fatfl = 0;
906                         error("nonterminal %s never derives any token string", nontrst[i].name);
907                 }
908         }
909
910         if(nerrors) {
911                 summary();
912                 cleantmp();
913                 exits("error");
914         }
915
916         /* now, compute the pempty array, to see which nonterminals derive the empty string */
917         /* set pempty to WHOKNOWS */
918         aryfil( pempty, nnonter+1, WHOKNOWS);
919
920         /* loop as long as we keep finding empty nonterminals */
921
922 again:
923         PLOOP(1, i) {
924                 /* not known to be empty */
925                 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926                         for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
927                                 ;
928                         /* we have a nontrivially empty nonterminal */
929                         if(*p < 0) {
930                                 pempty[*prdptr[i]-NTBASE] = EMPTY;
931                                 /* got one ... try for another */
932                                 goto again;
933                         }
934                 }
935         }
936 }
937
938 /*
939  * generate the states
940  */
941 void
942 stagen(void)
943 {
944
945         int c, i, j, more;
946         Wset *p, *q;
947
948         /* initialize */
949         nstate = 0;
950
951         /* THIS IS FUNNY from the standpoint of portability
952          * it represents the magic moment when the mem0 array, which has
953          * been holding the productions, starts to hold item pointers, of a
954          * different type...
955          * someday, alloc should be used to allocate all this stuff... for now, we
956          * accept that if pointers don't fit in integers, there is a problem...
957          */
958
959         pstate[0] = pstate[1] = (Item*)mem;
960         aryfil(clset.lset, tbitset, 0);
961         putitem(prdptr[0]+1, &clset);
962         tystate[0] = MUSTDO;
963         nstate = 1;
964         pstate[2] = pstate[1];
965
966         aryfil(amem, ACTSIZE, 0);
967
968         /* now, the main state generation loop */
969         for(more=1; more;) {
970                 more = 0;
971                 SLOOP(i) {
972                         if(tystate[i] != MUSTDO)
973                                 continue;
974                         tystate[i] = DONE;
975                         aryfil(temp1, nnonter+1, 0);
976                         /* take state i, close it, and do gotos */
977                         closure(i);
978                         /* generate goto's */
979                         WSLOOP(wsets, p) {
980                                 if(p->flag)
981                                         continue;
982                                 p->flag = 1;
983                                 c = *(p->pitem);
984                                 if(c <= 1) {
985                                         if(pstate[i+1]-pstate[i] <= p-wsets)
986                                                 tystate[i] = MUSTLOOKAHEAD;
987                                         continue;
988                                 }
989                                 /* do a goto on c */
990                                 WSLOOP(p, q)
991                                         /* this item contributes to the goto */
992                                         if(c == *(q->pitem)) {
993                                                 putitem(q->pitem+1, &q->ws);
994                                                 q->flag = 1;
995                                         }
996                                 if(c < NTBASE)
997                                         state(c);       /* register new state */
998                                 else
999                                         temp1[c-NTBASE] = state(c);
1000                         }
1001                         if(gsdebug && foutput != 0) {
1002                                 Bprint(foutput, "%d: ", i);
1003                                 NTLOOP(j)
1004                                         if(temp1[j])
1005                                                 Bprint(foutput, "%s %d, ",
1006                                                 nontrst[j].name, temp1[j]);
1007                                 Bprint(foutput, "\n");
1008                         }
1009                         indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010                         /* do some more */
1011                         more = 1;
1012                 }
1013         }
1014 }
1015
1016 /*
1017  * generate the closure of state i
1018  */
1019 void
1020 closure(int i)
1021 {
1022
1023         Wset *u, *v;
1024         Item *p, *q;
1025         int c, ch, work, k, *pi, **s, **t;
1026
1027         zzclose++;
1028
1029         /* first, copy kernel of state i to wsets */
1030         cwp = wsets;
1031         ITMLOOP(i, p, q) {
1032                 cwp->pitem = p->pitem;
1033                 cwp->flag = 1;                  /* this item must get closed */
1034                 SETLOOP(k)
1035                         cwp->ws.lset[k] = p->look->lset[k];
1036                 WSBUMP(cwp);
1037         }
1038
1039         /* now, go through the loop, closing each item */
1040         work = 1;
1041         while(work) {
1042                 work = 0;
1043                 WSLOOP(wsets, u) {
1044                         if(u->flag == 0)
1045                                 continue;
1046                         /* dot is before c */
1047                         c = *(u->pitem);
1048                         if(c < NTBASE) {
1049                                 u->flag = 0;
1050                                 /* only interesting case is where . is before nonterminal */
1051                                 continue;
1052                         }
1053
1054                         /* compute the lookahead */
1055                         aryfil(clset.lset, tbitset, 0);
1056
1057                         /* find items involving c */
1058                         WSLOOP(u, v)
1059                                 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060                                         v->flag = 0;
1061                                         if(nolook)
1062                                                 continue;
1063                                         while((ch = *++pi) > 0) {
1064                                                 /* terminal symbol */
1065                                                 if(ch < NTBASE) {
1066                                                         SETBIT(clset.lset, ch);
1067                                                         break;
1068                                                 }
1069                                                 /* nonterminal symbol */
1070                                                 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071                                                 if(!pempty[ch-NTBASE])
1072                                                         break;
1073                                         }
1074                                         if(ch <= 0)
1075                                                 setunion(clset.lset, v->ws.lset);
1076                                 }
1077
1078                         /*
1079                          * now loop over productions derived from c
1080                          * c is now nonterminal number
1081                          */
1082                         c -= NTBASE;
1083                         t = pres[c+1];
1084                         for(s = pres[c]; s < t; ++s) {
1085                                 /*
1086                                  * put these items into the closure
1087                                  * is the item there
1088                                  */
1089                                 WSLOOP(wsets, v)
1090                                         /* yes, it is there */
1091                                         if(v->pitem == *s) {
1092                                                 if(nolook)
1093                                                         goto nexts;
1094                                                 if(setunion(v->ws.lset, clset.lset))
1095                                                         v->flag = work = 1;
1096                                                 goto nexts;
1097                                         }
1098
1099                                 /*  not there; make a new entry */
1100                                 if(cwp-wsets+1 >= WSETSIZE)
1101                                         error( "working set overflow");
1102                                 cwp->pitem = *s;
1103                                 cwp->flag = 1;
1104                                 if(!nolook) {
1105                                         work = 1;
1106                                         SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1107                                 }
1108                                 WSBUMP(cwp);
1109
1110                         nexts:;
1111                         }
1112                 }
1113         }
1114
1115         /* have computed closure; flags are reset; return */
1116         if(cwp > zzcwp)
1117                 zzcwp = cwp;
1118         if(cldebug && foutput != 0) {
1119                 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120                 WSLOOP(wsets, u) {
1121                         if(u->flag)
1122                                 Bprint(foutput, "flag set!\n");
1123                         u->flag = 0;
1124                         Bprint(foutput, "\t%s", writem(u->pitem));
1125                         prlook(&u->ws);
1126                         Bprint(foutput, "\n");
1127                 }
1128         }
1129 }
1130
1131 /*
1132  * decide if the lookahead set pointed to by p is known
1133  * return pointer to a perminent location for the set
1134  */
1135 Lkset*
1136 flset(Lkset *p)
1137 {
1138         Lkset *q;
1139         int *u, *v, *w, j;
1140
1141         for(q = &lkst[nlset]; q-- > lkst;) {
1142                 u = p->lset;
1143                 v = q->lset;
1144                 w = &v[tbitset];
1145                 while(v < w)
1146                         if(*u++ != *v++)
1147                                 goto more;
1148                 /* we have matched */
1149                 return q;
1150         more:;
1151         }
1152         /* add a new one */
1153         q = &lkst[nlset++];
1154         if(nlset >= LSETSIZE)
1155                 error("too many lookahead sets");
1156         SETLOOP(j)
1157                 q->lset[j] = p->lset[j];
1158         return q;
1159 }
1160
1161 void
1162 cleantmp(void)
1163 {
1164         ZAPFILE(faction, actname);
1165         ZAPFILE(ftemp, tempname);
1166 }
1167
1168 void
1169 intr(void)
1170 {
1171         cleantmp();
1172         exits("interrupted");
1173 }
1174
1175 void
1176 usage(void)
1177 {
1178         fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1179         exits("usage");
1180 }
1181
1182 void
1183 setup(int argc, char *argv[])
1184 {
1185         long c, t;
1186         int i, j, lev, ty, ytab, *p;
1187         int vflag, dflag, stem;
1188         char actnm[8], *stemc, *s, dirbuf[128];
1189
1190         ytab = 0;
1191         vflag = 0;
1192         dflag = 0;
1193         stem = 0;
1194         stemc = "y";
1195         foutput = 0;
1196         fdefine = 0;
1197         fdebug = 0;
1198         ARGBEGIN{
1199         case 'v':
1200         case 'V':
1201                 vflag++;
1202                 break;
1203         case 'D':
1204                 yydebug = EARGF(usage());
1205                 break;
1206         case 'd':
1207                 dflag++;
1208                 break;
1209         case 'o':
1210                 ytab++;
1211                 ytabc = EARGF(usage());
1212                 break;
1213         case 's':
1214                 stem++;
1215                 stemc = ARGF();
1216                 break;
1217         case 'S':
1218                 parser = PARSERS;
1219                 break;
1220         default:
1221                 error("illegal option: %c", ARGC());
1222         }ARGEND
1223         openup(stemc, dflag, vflag, ytab, ytabc);
1224
1225         ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1226         faction = Bopen(actname = mktemp(tactname), OWRITE);
1227         if(ftemp == 0 || faction == 0)
1228                 error("cannot open temp file");
1229         Blethal(ftemp, nil);
1230         Blethal(faction, nil);
1231         if(argc < 1)
1232                 error("no input file");
1233         infile = argv[0];
1234         if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1235                 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1236                 s = malloc(i);
1237                 if(s != nil){
1238                         snprint(s, i, "%s/%s", dirbuf, infile);
1239                         cleanname(s);
1240                         inpath = s;
1241                 }
1242         }
1243         finput = Bopen(inpath, OREAD);
1244         if(finput == 0)
1245                 error("cannot open '%s'", argv[0]);
1246         Blethal(finput, nil);
1247         cnamp = cnames;
1248
1249         defin(0, "$end");
1250         extval = PRIVATE;       /* tokens start in unicode 'private use' */
1251         defin(0, "error");
1252         defin(1, "$accept");
1253         defin(0, "$unk");
1254         mem = mem0;
1255         i = 0;
1256
1257         for(t = gettok(); t != MARK && t != ENDFILE;)
1258         switch(t) {
1259         case ';':
1260                 t = gettok();
1261                 break;
1262
1263         case START:
1264                 if(gettok() != IDENTIFIER)
1265                         error("bad %%start construction");
1266                 start = chfind(1, tokname);
1267                 t = gettok();
1268                 continue;
1269
1270         case TYPEDEF:
1271                 if(gettok() != TYPENAME)
1272                         error("bad syntax in %%type");
1273                 ty = numbval;
1274                 for(;;) {
1275                         t = gettok();
1276                         switch(t) {
1277                         case IDENTIFIER:
1278                                 if((t=chfind(1, tokname)) < NTBASE) {
1279                                         j = TYPE(toklev[t]);
1280                                         if(j != 0 && j != ty)
1281                                                 error("type redeclaration of token %s",
1282                                                         tokset[t].name);
1283                                         else
1284                                                 SETTYPE(toklev[t], ty);
1285                                 } else {
1286                                         j = nontrst[t-NTBASE].value;
1287                                         if(j != 0 && j != ty)
1288                                                 error("type redeclaration of nonterminal %s",
1289                                                         nontrst[t-NTBASE].name );
1290                                         else
1291                                                 nontrst[t-NTBASE].value = ty;
1292                                 }
1293                         case ',':
1294                                 continue;
1295                         case ';':
1296                                 t = gettok();
1297                         default:
1298                                 break;
1299                         }
1300                         break;
1301                 }
1302                 continue;
1303
1304         case UNION:
1305                 /* copy the union declaration to the output */
1306                 cpyunion();
1307                 t = gettok();
1308                 continue;
1309
1310         case LEFT:
1311         case BINARY:
1312         case RIGHT:
1313                 i++;
1314
1315         case TERM:
1316                 /* nonzero means new prec. and assoc. */
1317                 lev = t-TERM;
1318                 ty = 0;
1319
1320                 /* get identifiers so defined */
1321                 t = gettok();
1322
1323                 /* there is a type defined */
1324                 if(t == TYPENAME) {
1325                         ty = numbval;
1326                         t = gettok();
1327                 }
1328                 for(;;) {
1329                         switch(t) {
1330                         case ',':
1331                                 t = gettok();
1332                                 continue;
1333
1334                         case ';':
1335                                 break;
1336
1337                         case IDENTIFIER:
1338                                 j = chfind(0, tokname);
1339                                 if(j >= NTBASE)
1340                                         error("%s defined earlier as nonterminal", tokname);
1341                                 if(lev) {
1342                                         if(ASSOC(toklev[j]))
1343                                                 error("redeclaration of precedence of %s", tokname);
1344                                         SETASC(toklev[j], lev);
1345                                         SETPLEV(toklev[j], i);
1346                                 }
1347                                 if(ty) {
1348                                         if(TYPE(toklev[j]))
1349                                                 error("redeclaration of type of %s", tokname);
1350                                         SETTYPE(toklev[j],ty);
1351                                 }
1352                                 t = gettok();
1353                                 if(t == NUMBER) {
1354                                         tokset[j].value = numbval;
1355                                         if(j < ndefout && j > 3)
1356                                                 error("please define type number of %s earlier",
1357                                                         tokset[j].name);
1358                                         t = gettok();
1359                                 }
1360                                 continue;
1361                         }
1362                         break;
1363                 }
1364                 continue;
1365
1366         case LCURLY:
1367                 defout(0);
1368                 cpycode();
1369                 t = gettok();
1370                 continue;
1371
1372         default:
1373                 error("syntax error");
1374         }
1375         if(t == ENDFILE)
1376                 error("unexpected EOF before %%");
1377
1378         /* t is MARK */
1379         Bprint(ftable, "extern  int     yyerrflag;\n");
1380         Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1381         Bprint(ftable, "#define YYMAXDEPTH      150\n");
1382         Bprint(ftable, "#endif\n" );
1383         if(!ntypes) {
1384                 Bprint(ftable, "#ifndef YYSTYPE\n");
1385                 Bprint(ftable, "#define YYSTYPE int\n");
1386                 Bprint(ftable, "#endif\n");
1387         }
1388         Bprint(ftable, "YYSTYPE yylval;\n");
1389         Bprint(ftable, "YYSTYPE yyval;\n");
1390
1391         prdptr[0] = mem;
1392
1393         /* added production */
1394         *mem++ = NTBASE;
1395
1396         /* if start is 0, we will overwrite with the lhs of the first rule */
1397         *mem++ = start;
1398         *mem++ = 1;
1399         *mem++ = 0;
1400         prdptr[1] = mem;
1401         while((t=gettok()) == LCURLY)
1402                 cpycode();
1403         if(t != IDENTCOLON)
1404                 error("bad syntax on first rule");
1405
1406         if(!start)
1407                 prdptr[0][1] = chfind(1, tokname);
1408
1409         /* read rules */
1410         while(t != MARK && t != ENDFILE) {
1411                 /* process a rule */
1412                 rlines[nprod] = lineno;
1413                 if(t == '|')
1414                         *mem++ = *prdptr[nprod-1];
1415                 else
1416                         if(t == IDENTCOLON) {
1417                                 *mem = chfind(1, tokname);
1418                                 if(*mem < NTBASE)
1419                                         error("token illegal on LHS of grammar rule");
1420                                 mem++;
1421                         } else
1422                                 error("illegal rule: missing semicolon or | ?");
1423                 /* read rule body */
1424                 t = gettok();
1425
1426         more_rule:
1427                 while(t == IDENTIFIER) {
1428                         *mem = chfind(1, tokname);
1429                         if(*mem < NTBASE)
1430                                 levprd[nprod] = toklev[*mem];
1431                         mem++;
1432                         t = gettok();
1433                 }
1434                 if(t == PREC) {
1435                         if(gettok() != IDENTIFIER)
1436                                 error("illegal %%prec syntax");
1437                         j = chfind(2, tokname);
1438                         if(j >= NTBASE)
1439                                 error("nonterminal %s illegal after %%prec",
1440                                         nontrst[j-NTBASE].name);
1441                         levprd[nprod] = toklev[j];
1442                         t = gettok();
1443                 }
1444                 if(t == '=') {
1445                         levprd[nprod] |= ACTFLAG;
1446                         Bprint(faction, "\ncase %d:", nprod);
1447                         cpyact(mem-prdptr[nprod]-1);
1448                         Bprint(faction, " break;");
1449                         if((t=gettok()) == IDENTIFIER) {
1450
1451                                 /* action within rule... */
1452                                 sprint(actnm, "$$%d", nprod);
1453
1454                                 /* make it a nonterminal */
1455                                 j = chfind(1, actnm);
1456
1457                                 /*
1458                                  * the current rule will become rule number nprod+1
1459                                  * move the contents down, and make room for the null
1460                                  */
1461                                 for(p = mem; p >= prdptr[nprod]; --p)
1462                                         p[2] = *p;
1463                                 mem += 2;
1464
1465                                 /* enter null production for action */
1466                                 p = prdptr[nprod];
1467                                 *p++ = j;
1468                                 *p++ = -nprod;
1469
1470                                 /* update the production information */
1471                                 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1472                                 levprd[nprod] = ACTFLAG;
1473                                 if(++nprod >= NPROD)
1474                                         error("more than %d rules", NPROD);
1475                                 prdptr[nprod] = p;
1476
1477                                 /* make the action appear in the original rule */
1478                                 *mem++ = j;
1479
1480                                 /* get some more of the rule */
1481                                 goto more_rule;
1482                         }
1483                 }
1484
1485                 while(t == ';')
1486                         t = gettok();
1487                 *mem++ = -nprod;
1488
1489                 /* check that default action is reasonable */
1490                 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1491
1492                         /* no explicit action, LHS has value */
1493                         int tempty;
1494
1495                         tempty = prdptr[nprod][1];
1496                         if(tempty < 0)
1497                                 error("must return a value, since LHS has a type");
1498                         else
1499                                 if(tempty >= NTBASE)
1500                                         tempty = nontrst[tempty-NTBASE].value;
1501                                 else
1502                                         tempty = TYPE(toklev[tempty]);
1503                         if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1504                                 error("default action causes potential type clash");
1505                 }
1506                 nprod++;
1507                 if(nprod >= NPROD)
1508                         error("more than %d rules", NPROD);
1509                 prdptr[nprod] = mem;
1510                 levprd[nprod] = 0;
1511         }
1512
1513         /* end of all rules */
1514         defout(1);
1515
1516         finact();
1517         if(t == MARK) {
1518                 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1519                 while((c=Bgetrune(finput)) != Beof)
1520                         Bputrune(ftable, c);
1521         }
1522         Bterm(finput);
1523 }
1524
1525 /*
1526  * finish action routine
1527  */
1528 void
1529 finact(void)
1530 {
1531
1532         Bterm(faction);
1533         Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1534         Bprint(ftable, "#define YYERRCODE %d\n", 2);
1535 }
1536
1537 /*
1538  * define s to be a terminal if t=0
1539  * or a nonterminal if t=1
1540  */
1541 int
1542 defin(int nt, char *s)
1543 {
1544         int val;
1545         Rune rune;
1546
1547         val = 0;
1548         if(nt) {
1549                 nnonter++;
1550                 if(nnonter >= NNONTERM)
1551                         error("too many nonterminals, limit %d",NNONTERM);
1552                 nontrst[nnonter].name = cstash(s);
1553                 return NTBASE + nnonter;
1554         }
1555
1556         /* must be a token */
1557         ntokens++;
1558         if(ntokens >= NTERMS)
1559                 error("too many terminals, limit %d", NTERMS);
1560         tokset[ntokens].name = cstash(s);
1561
1562         /* establish value for token */
1563         /* single character literal */
1564         if(s[0] == ' ') {
1565                 val = chartorune(&rune, &s[1]);
1566                 if(s[val+1] == 0) {
1567                         val = rune;
1568                         goto out;
1569                 }
1570         }
1571
1572         /* escape sequence */
1573         if(s[0] == ' ' && s[1] == '\\') {
1574                 if(s[3] == 0) {
1575                         /* single character escape sequence */
1576                         switch(s[2]) {
1577                         case 'n':       val = '\n'; break;
1578                         case 'r':       val = '\r'; break;
1579                         case 'b':       val = '\b'; break;
1580                         case 't':       val = '\t'; break;
1581                         case 'f':       val = '\f'; break;
1582                         case '\'':      val = '\''; break;
1583                         case '"':       val = '"'; break;
1584                         case '\\':      val = '\\'; break;
1585                         default:        error("invalid escape");
1586                         }
1587                         goto out;
1588                 }
1589
1590                 /* \nnn sequence */
1591                 if(s[2] >= '0' && s[2] <= '7') {
1592                         if(s[3] < '0' ||
1593                            s[3] > '7' ||
1594                            s[4] < '0' ||
1595                            s[4] > '7' ||
1596                            s[5] != 0)
1597                                 error("illegal \\nnn construction");
1598                         val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1599                         if(val == 0)
1600                                 error("'\\000' is illegal");
1601                         goto out;
1602                 }
1603                 error("unknown escape");
1604         }
1605         val = extval++;
1606
1607 out:
1608         tokset[ntokens].value = val;
1609         toklev[ntokens] = 0;
1610         return ntokens;
1611 }
1612
1613 /*
1614  * write out the defines (at the end of the declaration section)
1615  */
1616 void
1617 defout(int last)
1618 {
1619         int i, c;
1620         char sar[NAMESIZE+10];
1621
1622         for(i=ndefout; i<=ntokens; i++) {
1623                 /* non-literals */
1624                 c = tokset[i].name[0];
1625                 if(c != ' ' && c != '$') {
1626                         Bprint(ftable, "#define %s      %d\n",
1627                                 tokset[i].name, tokset[i].value);
1628                         if(fdefine)
1629                                 Bprint(fdefine, "#define\t%s\t%d\n",
1630                                         tokset[i].name, tokset[i].value);
1631                 }
1632         }
1633         ndefout = ntokens+1;
1634         if(last && fdebug) {
1635                 Bprint(fdebug, "char*   yytoknames[] =\n{\n");
1636                 TLOOP(i) {
1637                         if(tokset[i].name) {
1638                                 chcopy(sar, tokset[i].name);
1639                                 Bprint(fdebug, "\t\"%s\",\n", sar);
1640                                 continue;
1641                         }
1642                         Bprint(fdebug, "\t0,\n");
1643                 }
1644                 Bprint(fdebug, "};\n");
1645         }
1646 }
1647
1648 char*
1649 cstash(char *s)
1650 {
1651         char *temp;
1652
1653         temp = cnamp;
1654         do {
1655                 if(cnamp >= &cnames[cnamsz])
1656                         error("too many characters in id's and literals");
1657                 else
1658                         *cnamp++ = *s;
1659         } while(*s++);
1660         return temp;
1661 }
1662
1663 long
1664 gettok(void)
1665 {
1666         long c;
1667         Rune rune;
1668         int i, base, match, reserve;
1669         static int peekline;
1670
1671 begin:
1672         reserve = 0;
1673         lineno += peekline;
1674         peekline = 0;
1675         c = Bgetrune(finput);
1676         while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1677                 if(c == '\n')
1678                         lineno++;
1679                 c = Bgetrune(finput);
1680         }
1681
1682         /* skip comment */
1683         if(c == '/') {
1684                 lineno += skipcom();
1685                 goto begin;
1686         }
1687         switch(c) {
1688         case Beof:
1689                 return ENDFILE;
1690
1691         case '{':
1692                 Bungetrune(finput);
1693                 return '=';
1694
1695         case '<':
1696                 /* get, and look up, a type name (union member name) */
1697                 i = 0;
1698                 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1699                         rune = c;
1700                         c = runetochar(&tokname[i], &rune);
1701                         if(i < NAMESIZE)
1702                                 i += c;
1703                 }
1704                 if(c != '>')
1705                         error("unterminated < ... > clause");
1706                 tokname[i] = 0;
1707                 for(i=1; i<=ntypes; i++)
1708                         if(!strcmp(typeset[i], tokname)) {
1709                                 numbval = i;
1710                                 return TYPENAME;
1711                         }
1712                 ntypes++;
1713                 numbval = ntypes;
1714                 typeset[numbval] = cstash(tokname);
1715                 return TYPENAME;
1716
1717         case '"':
1718         case '\'':
1719                 match = c;
1720                 tokname[0] = ' ';
1721                 i = 1;
1722                 for(;;) {
1723                         c = Bgetrune(finput);
1724                         if(c == '\n' || c <= 0)
1725                                 error("illegal or missing ' or \"" );
1726                         if(c == '\\') {
1727                                 tokname[i] = '\\';
1728                                 if(i < NAMESIZE)
1729                                         i++;
1730                                 c = Bgetrune(finput);
1731                         } else
1732                                 if(c == match)
1733                                         break;
1734                         rune = c;
1735                         c = runetochar(&tokname[i], &rune);
1736                         if(i < NAMESIZE)
1737                                 i += c;
1738                 }
1739                 break;
1740
1741         case '%':
1742         case '\\':
1743                 switch(c = Bgetrune(finput)) {
1744                 case '0':       return TERM;
1745                 case '<':       return LEFT;
1746                 case '2':       return BINARY;
1747                 case '>':       return RIGHT;
1748                 case '%':
1749                 case '\\':      return MARK;
1750                 case '=':       return PREC;
1751                 case '{':       return LCURLY;
1752                 default:        reserve = 1;
1753                 }
1754
1755         default:
1756                 /* number */
1757                 if(isdigit(c)) {
1758                         numbval = c-'0';
1759                         base = (c=='0')? 8: 10;
1760                         for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1761                                 numbval = numbval*base + (c-'0');
1762                         Bungetrune(finput);
1763                         return NUMBER;
1764                 }
1765                 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1766                         i = 0;
1767                         while(islower(c) || isupper(c) || isdigit(c) ||
1768                             c == '-' || c=='_' || c=='.' || c=='$') {
1769                                 if(reserve && isupper(c))
1770                                         c += 'a'-'A';
1771                                 rune = c;
1772                                 c = runetochar(&tokname[i], &rune);
1773                                 if(i < NAMESIZE)
1774                                         i += c;
1775                                 c = Bgetrune(finput);
1776                         }
1777                 } else
1778                         return c;
1779                 Bungetrune(finput);
1780         }
1781         tokname[i] = 0;
1782
1783         /* find a reserved word */
1784         if(reserve) {
1785                 for(c=0; resrv[c].name; c++)
1786                         if(strcmp(tokname, resrv[c].name) == 0)
1787                                 return resrv[c].value;
1788                 error("invalid escape, or illegal reserved word: %s", tokname);
1789         }
1790
1791         /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1792         c = Bgetrune(finput);
1793         while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1794                 if(c == '\n')
1795                         peekline++;
1796                 /* look for comments */
1797                 if(c == '/')
1798                         peekline += skipcom();
1799                 c = Bgetrune(finput);
1800         }
1801         if(c == ':')
1802                 return IDENTCOLON;
1803         Bungetrune(finput);
1804         return IDENTIFIER;
1805 }
1806
1807 /*
1808  * determine the type of a symbol
1809  */
1810 int
1811 fdtype(int t)
1812 {
1813         int v;
1814
1815         if(t >= NTBASE)
1816                 v = nontrst[t-NTBASE].value;
1817         else
1818                 v = TYPE(toklev[t]);
1819         if(v <= 0)
1820                 error("must specify type for %s", (t>=NTBASE)?
1821                         nontrst[t-NTBASE].name: tokset[t].name);
1822         return v;
1823 }
1824
1825 int
1826 chfind(int t, char *s)
1827 {
1828         int i;
1829
1830         if(s[0] == ' ')
1831                 t = 0;
1832         TLOOP(i)
1833                 if(!strcmp(s, tokset[i].name))
1834                         return i;
1835         NTLOOP(i)
1836                 if(!strcmp(s, nontrst[i].name))
1837                         return NTBASE+i;
1838
1839         /* cannot find name */
1840         if(t > 1)
1841                 error("%s should have been defined earlier", s);
1842         return defin(t, s);
1843 }
1844
1845 /*
1846  * copy the union declaration to the output, and the define file if present
1847  */
1848 void
1849 cpyunion(void)
1850 {
1851         long c;
1852         int level;
1853
1854         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1855         Bprint(ftable, "typedef union ");
1856         if(fdefine != 0)
1857                 Bprint(fdefine, "\ntypedef union ");
1858
1859         level = 0;
1860         for(;;) {
1861                 if((c=Bgetrune(finput)) == Beof)
1862                         error("EOF encountered while processing %%union");
1863                 Bputrune(ftable, c);
1864                 if(fdefine != 0)
1865                         Bputrune(fdefine, c);
1866                 switch(c) {
1867                 case '\n':
1868                         lineno++;
1869                         break;
1870                 case '{':
1871                         level++;
1872                         break;
1873                 case '}':
1874                         level--;
1875
1876                         /* we are finished copying */
1877                         if(level == 0) {
1878                                 Bprint(ftable, " YYSTYPE;\n");
1879                                 if(fdefine != 0)
1880                                         Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1881                                 return;
1882                         }
1883                 }
1884         }
1885 }
1886
1887 /*
1888  * copies code between \{ and \}
1889  */
1890 void
1891 cpycode(void)
1892 {
1893
1894         long c;
1895
1896         c = Bgetrune(finput);
1897         if(c == '\n') {
1898                 c = Bgetrune(finput);
1899                 lineno++;
1900         }
1901         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1902         while(c != Beof) {
1903                 if(c == '\\') {
1904                         if((c=Bgetrune(finput)) == '}')
1905                                 return;
1906                         Bputc(ftable, '\\');
1907                 }
1908                 if(c == '%') {
1909                         if((c=Bgetrune(finput)) == '}')
1910                                 return;
1911                         Bputc(ftable, '%');
1912                 }
1913                 Bputrune(ftable, c);
1914                 if(c == '\n')
1915                         lineno++;
1916                 c = Bgetrune(finput);
1917         }
1918         error("eof before %%}");
1919 }
1920
1921 /*
1922  * skip over comments
1923  * skipcom is called after reading a '/'
1924  */
1925 int
1926 skipcom(void)
1927 {
1928         long c;
1929         int i;
1930
1931         /* i is the number of lines skipped */
1932         i = 0;
1933         c = Bgetrune(finput);
1934         if(c == '/'){                   /* C++ //: skip to end of line */
1935                 while((c = Bgetrune(finput)) != Beof)
1936                         if(c == '\n')
1937                                 return 1;
1938         }else if(c == '*'){             /* normal C comment */
1939                 while((c = Bgetrune(finput)) != Beof) {
1940                         while(c == '*')
1941                                 if((c = Bgetrune(finput)) == '/')
1942                                         return i;
1943                         if(c == '\n')
1944                                 i++;
1945                 }
1946         }else
1947                 error("illegal comment");
1948
1949         error("EOF inside comment");
1950         return 0;
1951 }
1952
1953 /*
1954  * copy C action to the next ; or closing }
1955  */
1956 void
1957 cpyact(int offset)
1958 {
1959         long c;
1960         int brac, match, j, s, fnd, tok;
1961
1962         Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, inpath);
1963         brac = 0;
1964
1965 loop:
1966         c = Bgetrune(finput);
1967 swt:
1968         switch(c) {
1969         case ';':
1970                 if(brac == 0) {
1971                         Bputrune(faction, c);
1972                         return;
1973                 }
1974                 goto lcopy;
1975
1976         case '{':
1977                 brac++;
1978                 goto lcopy;
1979
1980         case '$':
1981                 s = 1;
1982                 tok = -1;
1983                 c = Bgetrune(finput);
1984
1985                 /* type description */
1986                 if(c == '<') {
1987                         Bungetrune(finput);
1988                         if(gettok() != TYPENAME)
1989                                 error("bad syntax on $<ident> clause");
1990                         tok = numbval;
1991                         c = Bgetrune(finput);
1992                 }
1993                 if(c == '$') {
1994                         Bprint(faction, "yyval");
1995
1996                         /* put out the proper tag... */
1997                         if(ntypes) {
1998                                 if(tok < 0)
1999                                         tok = fdtype(*prdptr[nprod]);
2000                                 Bprint(faction, ".%s", typeset[tok]);
2001                         }
2002                         goto loop;
2003                 }
2004                 if(c == '-') {
2005                         s = -s;
2006                         c = Bgetrune(finput);
2007                 }
2008                 if(isdigit(c)) {
2009                         j = 0;
2010                         while(isdigit(c)) {
2011                                 j = j*10 + (c-'0');
2012                                 c = Bgetrune(finput);
2013                         }
2014                         Bungetrune(finput);
2015                         j = j*s - offset;
2016                         if(j > 0)
2017                                 error("Illegal use of $%d", j+offset);
2018
2019                 dollar:
2020                         Bprint(faction, "yypt[-%d].yyv", -j);
2021
2022                         /* put out the proper tag */
2023                         if(ntypes) {
2024                                 if(j+offset <= 0 && tok < 0)
2025                                         error("must specify type of $%d", j+offset);
2026                                 if(tok < 0)
2027                                         tok = fdtype(prdptr[nprod][j+offset]);
2028                                 Bprint(faction, ".%s", typeset[tok]);
2029                         }
2030                         goto loop;
2031                 }
2032                 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2033                         int tok; /* tok used oustide for type info */
2034
2035                         /* look for $name */
2036                         Bungetrune(finput);
2037                         if(gettok() != IDENTIFIER)
2038                                 error("$ must be followed by an identifier");
2039                         tok = chfind(2, tokname);
2040                         if((c = Bgetrune(finput)) != '#') {
2041                                 Bungetrune(finput);
2042                                 fnd = -1;
2043                         } else
2044                                 if(gettok() != NUMBER) {
2045                                         error("# must be followed by number");
2046                                         fnd = -1;
2047                                 } else
2048                                         fnd = numbval;
2049                         for(j=1; j<=offset; ++j)
2050                                 if(tok == prdptr[nprod][j]) {
2051                                         if(--fnd <= 0) {
2052                                                 j -= offset;
2053                                                 goto dollar;
2054                                         }
2055                                 }
2056                         error("$name or $name#number not found");
2057                 }
2058                 Bputc(faction, '$');
2059                 if(s < 0 )
2060                         Bputc(faction, '-');
2061                 goto swt;
2062
2063         case '}':
2064                 brac--;
2065                 if(brac)
2066                         goto lcopy;
2067                 Bputrune(faction, c);
2068                 return;
2069
2070         case '/':
2071                 /* look for comments */
2072                 Bputrune(faction, c);
2073                 c = Bgetrune(finput);
2074                 if(c != '*')
2075                         goto swt;
2076
2077                 /* it really is a comment */
2078                 Bputrune(faction, c);
2079                 c = Bgetrune(finput);
2080                 while(c >= 0) {
2081                         while(c == '*') {
2082                                 Bputrune(faction, c);
2083                                 if((c=Bgetrune(finput)) == '/')
2084                                         goto lcopy;
2085                         }
2086                         Bputrune(faction, c);
2087                         if(c == '\n')
2088                                 lineno++;
2089                         c = Bgetrune(finput);
2090                 }
2091                 error("EOF inside comment");
2092
2093         case '\'':
2094                 /* character constant */
2095                 match = '\'';
2096                 goto string;
2097
2098         case '"':
2099                 /* character string */
2100                 match = '"';
2101
2102         string:
2103                 Bputrune(faction, c);
2104                 while(c = Bgetrune(finput)) {
2105                         if(c == '\\') {
2106                                 Bputrune(faction, c);
2107                                 c = Bgetrune(finput);
2108                                 if(c == '\n')
2109                                         lineno++;
2110                         } else {
2111                                 if(c == match)
2112                                         goto lcopy;
2113                                 if(c == '\n')
2114                                         error("newline in string or char. const.");
2115                         }
2116                         Bputrune(faction, c);
2117                 }
2118                 error("EOF in string or character constant");
2119
2120         case Beof:
2121                 error("action does not terminate");
2122
2123         case '\n':
2124                 lineno++;
2125                 goto lcopy;
2126         }
2127
2128 lcopy:
2129         Bputrune(faction, c);
2130         goto loop;
2131 }
2132
2133 void
2134 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2135 {
2136         char buf[256];
2137
2138         if(vflag) {
2139                 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2140                 foutput = Bopen(buf, OWRITE);
2141                 if(foutput == 0)
2142                         error("cannot open %s", buf);
2143                 Blethal(foutput, nil);
2144         }
2145         if(yydebug) {
2146                 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2147                 if((fdebug = Bopen(buf, OWRITE)) == 0)
2148                         error("can't open %s", buf);
2149         }
2150         if(dflag) {
2151                 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2152                 fdefine = Bopen(buf, OWRITE);
2153                 if(fdefine == 0)
2154                         error("can't create %s", buf);
2155                 Blethal(fdefine, nil);
2156         }
2157         if(ytab == 0)
2158                 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2159         else
2160                 strecpy(buf, buf+sizeof buf, ytabc);
2161         ftable = Bopen(buf, OWRITE);
2162         if(ftable == 0)
2163                 error("cannot open table file %s", buf);
2164         Blethal(ftable, nil);
2165 }
2166
2167 /*
2168  * print the output for the states
2169  */
2170 void
2171 output(void)
2172 {
2173         int i, k, c;
2174         Wset *u, *v;
2175
2176         Bprint(ftable, "short   yyexca[] =\n{");
2177         if(fdebug)
2178                 Bprint(fdebug, "char*   yystates[] =\n{\n");
2179
2180         /* output the stuff for state i */
2181         SLOOP(i) {
2182                 nolook = tystate[i]!=MUSTLOOKAHEAD;
2183                 closure(i);
2184
2185                 /* output actions */
2186                 nolook = 1;
2187                 aryfil(temp1, ntokens+nnonter+1, 0);
2188                 WSLOOP(wsets, u) {
2189                         c = *(u->pitem);
2190                         if(c > 1 && c < NTBASE && temp1[c] == 0) {
2191                                 WSLOOP(u, v)
2192                                         if(c == *(v->pitem))
2193                                                 putitem(v->pitem+1, (Lkset*)0);
2194                                 temp1[c] = state(c);
2195                         } else
2196                                 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2197                                         temp1[c+ntokens] = amem[indgo[i]+c];
2198                 }
2199                 if(i == 1)
2200                         temp1[1] = ACCEPTCODE;
2201
2202                 /* now, we have the shifts; look at the reductions */
2203                 lastred = 0;
2204                 WSLOOP(wsets, u) {
2205                         c = *u->pitem;
2206
2207                         /* reduction */
2208                         if(c <= 0) {
2209                                 lastred = -c;
2210                                 TLOOP(k)
2211                                         if(BIT(u->ws.lset, k)) {
2212                                                 if(temp1[k] == 0)
2213                                                         temp1[k] = c;
2214                                                 else
2215                                                 if(temp1[k] < 0) { /* reduce/reduce conflict */
2216                                                         if(foutput)
2217                                                                 Bprint(foutput,
2218                                                                         "\n%d: reduce/reduce conflict"
2219                                                                         " (red'ns %d and %d ) on %s",
2220                                                                         i, -temp1[k], lastred,
2221                                                                         symnam(k));
2222                                                         if(-temp1[k] > lastred)
2223                                                                 temp1[k] = -lastred;
2224                                                         zzrrconf++;
2225                                                 } else
2226                                                         /* potential shift/reduce conflict */
2227                                                         precftn( lastred, k, i );
2228                                         }
2229                                 }
2230                 }
2231                 wract(i);
2232         }
2233
2234         if(fdebug)
2235                 Bprint(fdebug, "};\n");
2236         Bprint(ftable, "};\n");
2237         Bprint(ftable, "#define YYNPROD %d\n", nprod);
2238         Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2239         if(yydebug)
2240                 Bprint(ftable, "#define yydebug %s\n", yydebug);
2241 }
2242
2243 /*
2244  * pack state i from temp1 into amem
2245  */
2246 int
2247 apack(int *p, int n)
2248 {
2249         int *pp, *qq, *rr, off, *q, *r;
2250
2251         /* we don't need to worry about checking because
2252          * we will only look at entries known to be there...
2253          * eliminate leading and trailing 0's
2254          */
2255
2256         q = p+n;
2257         for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2258                 ;
2259         /* no actions */
2260         if(pp > q)
2261                 return 0;
2262         p = pp;
2263
2264         /* now, find a place for the elements from p to q, inclusive */
2265         r = &amem[ACTSIZE-1];
2266         for(rr = amem; rr <= r; rr++, off++) {
2267                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2268                         if(*pp != 0)
2269                                 if(*pp != *qq && *qq != 0)
2270                                         goto nextk;
2271
2272                 /* we have found an acceptable k */
2273                 if(pkdebug && foutput != 0)
2274                         Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2275                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2276                         if(*pp) {
2277                                 if(qq > r)
2278                                         error("action table overflow");
2279                                 if(qq > memp)
2280                                         memp = qq;
2281                                 *qq = *pp;
2282                         }
2283                 if(pkdebug && foutput != 0)
2284                         for(pp = amem; pp <= memp; pp += 10) {
2285                                 Bprint(foutput, "\t");
2286                                 for(qq = pp; qq <= pp+9; qq++)
2287                                         Bprint(foutput, "%d ", *qq);
2288                                 Bprint(foutput, "\n");
2289                         }
2290                 return(off);
2291         nextk:;
2292         }
2293         error("no space in action table");
2294         return 0;
2295 }
2296
2297 /*
2298  * output the gotos for the nontermninals
2299  */
2300 void
2301 go2out(void)
2302 {
2303         int i, j, k, best, count, cbest, times;
2304
2305         /* mark begining of gotos */
2306         Bprint(ftemp, "$\n");
2307         for(i = 1; i <= nnonter; i++) {
2308                 go2gen(i);
2309
2310                 /* find the best one to make default */
2311                 best = -1;
2312                 times = 0;
2313
2314                 /* is j the most frequent */
2315                 for(j = 0; j <= nstate; j++) {
2316                         if(tystate[j] == 0)
2317                                 continue;
2318                         if(tystate[j] == best)
2319                                 continue;
2320
2321                         /* is tystate[j] the most frequent */
2322                         count = 0;
2323                         cbest = tystate[j];
2324                         for(k = j; k <= nstate; k++)
2325                                 if(tystate[k] == cbest)
2326                                         count++;
2327                         if(count > times) {
2328                                 best = cbest;
2329                                 times = count;
2330                         }
2331                 }
2332
2333                 /* best is now the default entry */
2334                 zzgobest += times-1;
2335                 for(j = 0; j <= nstate; j++)
2336                         if(tystate[j] != 0 && tystate[j] != best) {
2337                                 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2338                                 zzgoent++;
2339                         }
2340
2341                 /* now, the default */
2342                 if(best == -1)
2343                         best = 0;
2344                 zzgoent++;
2345                 Bprint(ftemp, "%d\n", best);
2346         }
2347 }
2348
2349 /*
2350  * output the gotos for nonterminal c
2351  */
2352 void
2353 go2gen(int c)
2354 {
2355         int i, work, cc;
2356         Item *p, *q;
2357
2358
2359         /* first, find nonterminals with gotos on c */
2360         aryfil(temp1, nnonter+1, 0);
2361         temp1[c] = 1;
2362         work = 1;
2363         while(work) {
2364                 work = 0;
2365                 PLOOP(0, i)
2366
2367                         /* cc is a nonterminal */
2368                         if((cc=prdptr[i][1]-NTBASE) >= 0)
2369                                 /* cc has a goto on c */
2370                                 if(temp1[cc] != 0) {
2371
2372                                         /* thus, the left side of production i does too */
2373                                         cc = *prdptr[i]-NTBASE;
2374                                         if(temp1[cc] == 0) {
2375                                                   work = 1;
2376                                                   temp1[cc] = 1;
2377                                         }
2378                                 }
2379         }
2380
2381         /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2382         if(g2debug && foutput != 0) {
2383                 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2384                 NTLOOP(i)
2385                         if(temp1[i])
2386                                 Bprint(foutput, "%s ", nontrst[i].name);
2387                 Bprint(foutput, "\n");
2388         }
2389
2390         /* now, go through and put gotos into tystate */
2391         aryfil(tystate, nstate, 0);
2392         SLOOP(i)
2393                 ITMLOOP(i, p, q)
2394                         if((cc = *p->pitem) >= NTBASE)
2395                                 /* goto on c is possible */
2396                                 if(temp1[cc-NTBASE]) {
2397                                         tystate[i] = amem[indgo[i]+c];
2398                                         break;
2399                                 }
2400 }
2401
2402 /*
2403  * decide a shift/reduce conflict by precedence.
2404  * r is a rule number, t a token number
2405  * the conflict is in state s
2406  * temp1[t] is changed to reflect the action
2407  */
2408 void
2409 precftn(int r, int t, int s)
2410 {
2411         int lp, lt, action;
2412
2413         lp = levprd[r];
2414         lt = toklev[t];
2415         if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2416
2417                 /* conflict */
2418                 if(foutput != 0)
2419                         Bprint(foutput,
2420                                 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2421                                 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2422                 zzsrconf++;
2423                 return;
2424         }
2425         if(PLEVEL(lt) == PLEVEL(lp))
2426                 action = ASSOC(lt);
2427         else
2428                 if(PLEVEL(lt) > PLEVEL(lp))
2429                         action = RASC;  /* shift */
2430                 else
2431                         action = LASC;  /* reduce */
2432         switch(action) {
2433         case BASC:  /* error action */
2434                 temp1[t] = ERRCODE;
2435                 break;
2436         case LASC:  /* reduce */
2437                 temp1[t] = -r;
2438                 break;
2439         }
2440 }
2441
2442 /*
2443  * output state i
2444  * temp1 has the actions, lastred the default
2445  */
2446 void
2447 wract(int i)
2448 {
2449         int p, p0, p1, ntimes, tred, count, j, flag;
2450
2451         /* find the best choice for lastred */
2452         lastred = 0;
2453         ntimes = 0;
2454         TLOOP(j) {
2455                 if(temp1[j] >= 0)
2456                         continue;
2457                 if(temp1[j]+lastred == 0)
2458                         continue;
2459                 /* count the number of appearances of temp1[j] */
2460                 count = 0;
2461                 tred = -temp1[j];
2462                 levprd[tred] |= REDFLAG;
2463                 TLOOP(p)
2464                         if(temp1[p]+tred == 0)
2465                                 count++;
2466                 if(count > ntimes) {
2467                         lastred = tred;
2468                         ntimes = count;
2469                 }
2470         }
2471
2472         /*
2473          * for error recovery, arrange that, if there is a shift on the
2474          * error recovery token, `error', that the default be the error action
2475          */
2476         if(temp1[2] > 0)
2477                 lastred = 0;
2478
2479         /* clear out entries in temp1 which equal lastred */
2480         TLOOP(p)
2481                 if(temp1[p]+lastred == 0)
2482                         temp1[p] = 0;
2483
2484         wrstate(i);
2485         defact[i] = lastred;
2486         flag = 0;
2487         TLOOP(p0)
2488                 if((p1=temp1[p0]) != 0) {
2489                         if(p1 < 0) {
2490                                 p1 = -p1;
2491                                 goto exc;
2492                         }
2493                         if(p1 == ACCEPTCODE) {
2494                                 p1 = -1;
2495                                 goto exc;
2496                         }
2497                         if(p1 == ERRCODE) {
2498                                 p1 = 0;
2499                         exc:
2500                                 if(flag++ == 0)
2501                                         Bprint(ftable, "-1, %d,\n", i);
2502                                 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2503                                 zzexcp++;
2504                                 continue;
2505                         }
2506                         Bprint(ftemp, "%d,%d,", p0, p1);
2507                         zzacent++;
2508                 }
2509         if(flag) {
2510                 defact[i] = -2;
2511                 Bprint(ftable, "\t-2, %d,\n", lastred);
2512         }
2513         Bprint(ftemp, "\n");
2514 }
2515
2516 /*
2517  * writes state i
2518  */
2519 void
2520 wrstate(int i)
2521 {
2522         int j0, j1;
2523         Item *pp, *qq;
2524         Wset *u;
2525
2526         if(fdebug) {
2527                 if(lastred) {
2528                         Bprint(fdebug, "        0, /*%d*/\n", i);
2529                 } else {
2530                         Bprint(fdebug, "        \"");
2531                         ITMLOOP(i, pp, qq)
2532                                 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2533                         if(tystate[i] == MUSTLOOKAHEAD)
2534                                 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2535                                         if(*u->pitem < 0)
2536                                                 Bprint(fdebug, "%s\\n", writem(u->pitem));
2537                         Bprint(fdebug, "\", /*%d*/\n", i);
2538                 }
2539         }
2540         if(foutput == 0)
2541                 return;
2542         Bprint(foutput, "\nstate %d\n", i);
2543         ITMLOOP(i, pp, qq)
2544                 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2545         if(tystate[i] == MUSTLOOKAHEAD)
2546                 /* print out empty productions in closure */
2547                 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2548                         if(*u->pitem < 0)
2549                                 Bprint(foutput, "\t%s\n", writem(u->pitem));
2550
2551         /* check for state equal to another */
2552         TLOOP(j0)
2553                 if((j1=temp1[j0]) != 0) {
2554                         Bprint(foutput, "\n\t%s  ", symnam(j0));
2555                         /* shift, error, or accept */
2556                         if(j1 > 0) {
2557                                 if(j1 == ACCEPTCODE)
2558                                         Bprint(foutput,  "accept");
2559                                 else
2560                                         if(j1 == ERRCODE)
2561                                                 Bprint(foutput, "error");
2562                                         else
2563                                                 Bprint(foutput, "shift %d", j1);
2564                         } else
2565                                 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2566                 }
2567
2568         /* output the final production */
2569         if(lastred)
2570                 Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2571                         lastred, rlines[lastred]);
2572         else
2573                 Bprint(foutput, "\n\t.  error\n\n");
2574
2575         /* now, output nonterminal actions */
2576         j1 = ntokens;
2577         for(j0 = 1; j0 <= nnonter; j0++) {
2578                 j1++;
2579                 if(temp1[j1])
2580                         Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2581         }
2582 }
2583
2584 void
2585 warray(char *s, int *v, int n)
2586 {
2587         int i;
2588
2589         Bprint(ftable, "short   %s[] =\n{", s);
2590         for(i=0;;) {
2591                 if(i%10 == 0)
2592                         Bprint(ftable, "\n");
2593                 Bprint(ftable, "%4d", v[i]);
2594                 i++;
2595                 if(i >= n) {
2596                         Bprint(ftable, "\n};\n");
2597                         break;
2598                 }
2599                 Bprint(ftable, ",");
2600         }
2601 }
2602
2603 /*
2604  * in order to free up the mem and amem arrays for the optimizer,
2605  * and still be able to output yyr1, etc., after the sizes of
2606  * the action array is known, we hide the nonterminals
2607  * derived by productions in levprd.
2608  */
2609
2610 void
2611 hideprod(void)
2612 {
2613         int i, j;
2614
2615         j = 0;
2616         levprd[0] = 0;
2617         PLOOP(1, i) {
2618                 if(!(levprd[i] & REDFLAG)) {
2619                         j++;
2620                         if(foutput != 0)
2621                                 Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2622                 }
2623                 levprd[i] = *prdptr[i] - NTBASE;
2624         }
2625         if(j)
2626                 print("%d rules never reduced\n", j);
2627 }
2628
2629 void
2630 callopt(void)
2631 {
2632         int i, *p, j, k, *q;
2633
2634         /* read the arrays from tempfile and set parameters */
2635         finput = Bopen(tempname, OREAD);
2636         if(finput == 0)
2637                 error("optimizer cannot open tempfile");
2638         Blethal(finput, nil);
2639
2640         pgo[0] = 0;
2641         temp1[0] = 0;
2642         nstate = 0;
2643         nnonter = 0;
2644         for(;;) {
2645                 switch(gtnm()) {
2646                 case '\n':
2647                         nstate++;
2648                         pmem--;
2649                         temp1[nstate] = pmem - mem0;
2650                 case ',':
2651                         continue;
2652                 case '$':
2653                         break;
2654                 default:
2655                         error("bad tempfile");
2656                 }
2657                 break;
2658         }
2659
2660         pmem--;
2661         temp1[nstate] = yypgo[0] = pmem - mem0;
2662         for(;;) {
2663                 switch(gtnm()) {
2664                 case '\n':
2665                         nnonter++;
2666                         yypgo[nnonter] = pmem-mem0;
2667                 case ',':
2668                         continue;
2669                 case -1:
2670                         break;
2671                 default:
2672                         error("bad tempfile");
2673                 }
2674                 break;
2675         }
2676         pmem--;
2677         yypgo[nnonter--] = pmem - mem0;
2678         for(i = 0; i < nstate; i++) {
2679                 k = 32000;
2680                 j = 0;
2681                 q = mem0 + temp1[i+1];
2682                 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2683                         if(*p > j)
2684                                 j = *p;
2685                         if(*p < k)
2686                                 k = *p;
2687                 }
2688                 /* nontrivial situation */
2689                 if(k <= j) {
2690                         /* j is now the range */
2691 /*                      j -= k;                 /* call scj */
2692                         if(k > maxoff)
2693                                 maxoff = k;
2694                 }
2695                 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2696                 if(j > maxspr)
2697                         maxspr = j;
2698         }
2699
2700         /* initialize ggreed table */
2701         for(i = 1; i <= nnonter; i++) {
2702                 ggreed[i] = 1;
2703                 j = 0;
2704
2705                 /* minimum entry index is always 0 */
2706                 q = mem0 + yypgo[i+1] - 1;
2707                 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2708                         ggreed[i] += 2;
2709                         if(*p > j)
2710                                 j = *p;
2711                 }
2712                 ggreed[i] = ggreed[i] + 2*j;
2713                 if(j > maxoff)
2714                         maxoff = j;
2715         }
2716
2717         /* now, prepare to put the shift actions into the amem array */
2718         for(i = 0; i < ACTSIZE; i++)
2719                 amem[i] = 0;
2720         maxa = amem;
2721         for(i = 0; i < nstate; i++) {
2722                 if(tystate[i] == 0 && adb > 1)
2723                         Bprint(ftable, "State %d: null\n", i);
2724                 indgo[i] = YYFLAG1;
2725         }
2726         while((i = nxti()) != NOMORE)
2727                 if(i >= 0)
2728                         stin(i);
2729                 else
2730                         gin(-i);
2731
2732         /* print amem array */
2733         if(adb > 2 )
2734                 for(p = amem; p <= maxa; p += 10) {
2735                         Bprint(ftable, "%4d  ", (int)(p-amem));
2736                         for(i = 0; i < 10; ++i)
2737                                 Bprint(ftable, "%4d  ", p[i]);
2738                         Bprint(ftable, "\n");
2739                 }
2740
2741         /* write out the output appropriate to the language */
2742         aoutput();
2743         osummary();
2744         ZAPFILE(ftemp, tempname);
2745 }
2746
2747 void
2748 gin(int i)
2749 {
2750         int *p, *r, *s, *q1, *q2;
2751
2752         /* enter gotos on nonterminal i into array amem */
2753         ggreed[i] = 0;
2754
2755         q2 = mem0+ yypgo[i+1] - 1;
2756         q1 = mem0 + yypgo[i];
2757
2758         /* now, find amem place for it */
2759         for(p = amem; p < &amem[ACTSIZE]; p++) {
2760                 if(*p)
2761                         continue;
2762                 for(r = q1; r < q2; r += 2) {
2763                         s = p + *r + 1;
2764                         if(*s)
2765                                 goto nextgp;
2766                         if(s > maxa)
2767                                 if((maxa = s) > &amem[ACTSIZE])
2768                                         error("a array overflow");
2769                 }
2770                 /* we have found amem spot */
2771                 *p = *q2;
2772                 if(p > maxa)
2773                         if((maxa = p) > &amem[ACTSIZE])
2774                                 error("a array overflow");
2775                 for(r = q1; r < q2; r += 2) {
2776                         s = p + *r + 1;
2777                         *s = r[1];
2778                 }
2779                 pgo[i] = p-amem;
2780                 if(adb > 1)
2781                         Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2782                 return;
2783
2784         nextgp:;
2785         }
2786         error("cannot place goto %d\n", i);
2787 }
2788
2789 void
2790 stin(int i)
2791 {
2792         int *r, *s, n, flag, j, *q1, *q2;
2793
2794         tystate[i] = 0;
2795
2796         /* enter state i into the amem array */
2797         q2 = mem0+temp1[i+1];
2798         q1 = mem0+temp1[i];
2799         /* find an acceptable place */
2800         for(n = -maxoff; n < ACTSIZE; n++) {
2801                 flag = 0;
2802                 for(r = q1; r < q2; r += 2) {
2803                         if((s = *r + n + amem) < amem)
2804                                 goto nextn;
2805                         if(*s == 0)
2806                                 flag++;
2807                         else
2808                                 if(*s != r[1])
2809                                         goto nextn;
2810                 }
2811
2812                 /* check that the position equals another only if the states are identical */
2813                 for(j=0; j<nstate; j++) {
2814                         if(indgo[j] == n) {
2815
2816                                 /* we have some disagreement */
2817                                 if(flag)
2818                                         goto nextn;
2819                                 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2820
2821                                         /* states are equal */
2822                                         indgo[i] = n;
2823                                         if(adb > 1)
2824                                                 Bprint(ftable,
2825                                                 "State %d: entry at %d equals state %d\n",
2826                                                 i, n, j);
2827                                         return;
2828                                 }
2829
2830                                 /* we have some disagreement */
2831                                 goto nextn;
2832                         }
2833                 }
2834
2835                 for(r = q1; r < q2; r += 2) {
2836                         if((s = *r+n+amem) >= &amem[ACTSIZE])
2837                                 error("out of space in optimizer a array");
2838                         if(s > maxa)
2839                                 maxa = s;
2840                         if(*s != 0 && *s != r[1])
2841                                 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2842                         *s = r[1];
2843                 }
2844                 indgo[i] = n;
2845                 if(adb > 1)
2846                         Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2847                 return;
2848         nextn:;
2849         }
2850         error("Error; failure to place state %d\n", i);
2851 }
2852
2853 /*
2854  * finds the next i
2855  */
2856 int
2857 nxti(void)
2858 {
2859         int i, max, maxi;
2860
2861         max = 0;
2862         maxi = 0;
2863         for(i = 1; i <= nnonter; i++)
2864                 if(ggreed[i] >= max) {
2865                         max = ggreed[i];
2866                         maxi = -i;
2867                 }
2868         for(i = 0; i < nstate; ++i)
2869                 if(tystate[i] >= max) {
2870                         max = tystate[i];
2871                         maxi = i;
2872                 }
2873         if(nxdb)
2874                 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2875         if(max == 0)
2876                 return NOMORE;
2877         return maxi;
2878 }
2879
2880 /*
2881  * write summary
2882  */
2883 void
2884 osummary(void)
2885 {
2886
2887         int i, *p;
2888
2889         if(foutput == 0)
2890                 return;
2891         i = 0;
2892         for(p = maxa; p >= amem; p--)
2893                 if(*p == 0)
2894                         i++;
2895
2896         Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2897                 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2898         Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2899         Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2900 }
2901
2902 /*
2903  * this version is for C
2904  * write out the optimized parser
2905  */
2906 void
2907 aoutput(void)
2908 {
2909         Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2910         arout("yyact", amem, (maxa-amem)+1);
2911         arout("yypact", indgo, nstate);
2912         arout("yypgo", pgo, nnonter+1);
2913 }
2914
2915 void
2916 arout(char *s, int *v, int n)
2917 {
2918         int i;
2919
2920         Bprint(ftable, "short   %s[] =\n{", s);
2921         for(i = 0; i < n;) {
2922                 if(i%10 == 0)
2923                         Bprint(ftable, "\n");
2924                 Bprint(ftable, "%4d", v[i]);
2925                 i++;
2926                 if(i == n)
2927                         Bprint(ftable, "\n};\n");
2928                 else
2929                         Bprint(ftable, ",");
2930         }
2931 }
2932
2933 /*
2934  * read and convert an integer from the standard input
2935  * return the terminating character
2936  * blanks, tabs, and newlines are ignored
2937  */
2938 int
2939 gtnm(void)
2940 {
2941         int sign, val, c;
2942
2943         sign = 0;
2944         val = 0;
2945         while((c=Bgetrune(finput)) != Beof) {
2946                 if(isdigit(c)) {
2947                         val = val*10 + c-'0';
2948                         continue;
2949                 }
2950                 if(c == '-') {
2951                         sign = 1;
2952                         continue;
2953                 }
2954                 break;
2955         }
2956         if(sign)
2957                 val = -val;
2958         *pmem++ = val;
2959         if(pmem >= &mem0[MEMSIZE])
2960                 error("out of space");
2961         return c;
2962 }