]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/yacc.c
python: update python build configuration to new ape capabilities like getaddrinfo...
[plan9front.git] / sys / src / cmd / yacc.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5
6 #define Bungetrune      Bungetc         /* ok for now. */
7
8 /*
9  * all these are 32 bit
10  */
11 #define TBITSET         ((32+NTERMS)/32)        /* BOTCH?? +31 */
12 #define BIT(a,i)        ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i)     ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n)       (((n)+32)/32)
15
16 #define PARSER          "/sys/lib/yaccpar"
17 #define PARSERS         "/sys/lib/yaccpars"
18 #define TEMPNAME        "y.tmp.XXXXXX"
19 #define ACTNAME         "y.acts.XXXXXX"
20 #define OFILE           "tab.c"
21 #define FILEU           "output"
22 #define FILED           "tab.h"
23 #define FILEDEBUG       "debug"
24
25 enum
26 {
27 /*
28  * the following are adjustable
29  * according to memory size
30  */
31         ACTSIZE         = 40000,
32         MEMSIZE         = 40000,
33         NSTATES         = 2000,
34         NTERMS          = 511,
35         NPROD           = 1600,
36         NNONTERM        = 600,
37         TEMPSIZE        = 2000,
38         CNAMSZ          = 10000,
39         LSETSIZE        = 2400,
40         WSETSIZE        = 350,
41
42         NAMESIZE        = 50,
43         NTYPES          = 63,
44         ISIZE           = 400,
45
46         PRIVATE         = 0xE000,       /* unicode private use */
47
48         /* relationships which must hold:
49                 TBITSET ints must hold NTERMS+1 bits...
50                 WSETSIZE >= NNONTERM
51                 LSETSIZE >= NNONTERM
52                 TEMPSIZE >= NTERMS + NNONTERM + 1
53                 TEMPSIZE >= NSTATES
54         */
55
56         NTBASE          = 010000,
57         ERRCODE         = 8190,
58         ACCEPTCODE      = 8191,
59
60         NOASC           = 0,    /* no assoc. */
61         LASC            = 1,    /* left assoc. */
62         RASC            = 2,    /* right assoc. */
63         BASC            = 3,    /* binary assoc. */
64
65         /* flags for state generation */
66
67         DONE            = 0,
68         MUSTDO          = 1,
69         MUSTLOOKAHEAD   = 2,
70
71         /* flags for a rule having an action, and being reduced */
72
73         ACTFLAG         = 04,
74         REDFLAG         = 010,
75
76         /* output parser flags */
77         YYFLAG1         = -1000,
78
79         /* parse tokens */
80         IDENTIFIER      = PRIVATE,
81         MARK,
82         TERM,
83         LEFT,
84         RIGHT,
85         BINARY,
86         PREC,
87         LCURLY,
88         IDENTCOLON,
89         NUMBER,
90         START,
91         TYPEDEF,
92         TYPENAME,
93         UNION,
94
95         ENDFILE         = 0,
96
97         EMPTY           = 1,
98         WHOKNOWS        = 0,
99         OK              = 1,
100         NOMORE          = -1000,
101 };
102
103         /* macros for getting associativity and precedence levels */
104
105 #define ASSOC(i)        ((i)&03)
106 #define PLEVEL(i)       (((i)>>4)&077)
107 #define TYPE(i)         (((i)>>10)&077)
108
109         /* macros for setting associativity and precedence levels */
110
111 #define SETASC(i,j)     i |= j
112 #define SETPLEV(i,j)    i |= (j<<4)
113 #define SETTYPE(i,j)    i |= (j<<10)
114
115         /* looping macros */
116
117 #define TLOOP(i)        for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i)       for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i)      for(i=s; i<nprod; i++)
120 #define SLOOP(i)        for(i=0; i<nstate; i++)
121 #define WSBUMP(x)       x++
122 #define WSLOOP(s,j)     for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q)  for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i)      for(i=0; i<tbitset; i++)
125
126         /* command to clobber tempfiles after use */
127
128 #define ZAPFILE(x)      if(x) remove(x)
129
130         /* I/O descriptors */
131
132 Biobuf* faction;        /* file for saving actions */
133 Biobuf* fdefine;        /* file for #defines */
134 Biobuf* fdebug;         /* y.debug for strings for debugging */
135 Biobuf* ftable;         /* y.tab.c file */
136 Biobuf* ftemp;          /* tempfile to pass 2 */
137 Biobuf* finput;         /* input file */
138 Biobuf* foutput;        /* y.output file */
139
140         /* communication variables between various I/O routines */
141
142 char*   infile;                 /* input file name */
143 int     numbval;                /* value of an input number */
144 char    tokname[NAMESIZE+UTFmax+1];     /* input token name, slop for runes and 0 */
145
146         /* structure declarations */
147
148 typedef
149 struct
150 {
151         int     lset[TBITSET];
152 } Lkset;
153
154 typedef
155 struct
156 {
157         int*    pitem;
158         Lkset*  look;
159 } Item;
160
161 typedef
162 struct
163 {
164         char*   name;
165         int     value;
166 } Symb;
167
168 typedef
169 struct
170 {
171         int*    pitem;
172         int     flag;
173         Lkset   ws;
174 } Wset;
175
176         /* storage of names */
177
178 char    cnames[CNAMSZ];         /* place where token and nonterminal names are stored */
179 int     cnamsz = CNAMSZ;        /* size of cnames */
180 char*   cnamp = cnames;         /* place where next name is to be put in */
181 int     ndefout = 4;            /* number of defined symbols output */
182 char*   tempname;
183 char*   actname;
184 char    ttempname[] = TEMPNAME;
185 char    tactname[] = ACTNAME;
186 char*   parser = PARSER;
187 char*   yydebug;
188
189         /* storage of types */
190 int     ntypes;                 /* number of types defined */
191 char*   typeset[NTYPES];        /* pointers to type tags */
192
193         /* token information */
194
195 int     ntokens = 0 ;           /* number of tokens */
196 Symb    tokset[NTERMS];
197 int     toklev[NTERMS];         /* vector with the precedence of the terminals */
198
199         /* nonterminal information */
200
201 int     nnonter = -1;           /* the number of nonterminals */
202 Symb    nontrst[NNONTERM];
203 int     start;                  /* start symbol */
204
205         /* assigned token type values */
206 int     extval = 0;
207
208 char*   ytabc = OFILE;  /* name of y.tab.c */
209
210         /* grammar rule information */
211
212 int     mem0[MEMSIZE] ;         /* production storage */
213 int*    mem = mem0;
214 int     nprod = 1;              /* number of productions */
215 int*    prdptr[NPROD];          /* pointers to descriptions of productions */
216 int     levprd[NPROD];          /* precedence levels for the productions */
217 int     rlines[NPROD];          /* line number for this rule */
218
219         /* state information */
220
221 int     nstate = 0;             /* number of states */
222 Item*   pstate[NSTATES+2];      /* pointers to the descriptions of the states */
223 int     tystate[NSTATES];       /* contains type information about the states */
224 int     defact[NSTATES];        /* the default actions of states */
225 int     tstates[NTERMS];        /* states generated by terminal gotos */
226 int     ntstates[NNONTERM];     /* states generated by nonterminal gotos */
227 int     mstates[NSTATES];       /* chain of overflows of term/nonterm generation lists  */
228 int     lastred;                /* the number of the last reduction of a state */
229
230         /* lookahead set information */
231
232 Lkset   lkst[LSETSIZE];
233 int     nolook;                 /* flag to turn off lookahead computations */
234 int     tbitset;                /* size of lookahead sets */
235 int     nlset = 0;              /* next lookahead set index */
236 int     nolook = 0;             /* flag to suppress lookahead computations */
237 Lkset   clset;                  /* temporary storage for lookahead computations */
238
239         /* working set information */
240
241 Wset    wsets[WSETSIZE];
242 Wset*   cwp;
243
244         /* storage for action table */
245
246 int     amem[ACTSIZE];          /* action table storage */
247 int*    memp = amem;            /* next free action table position */
248 int     indgo[NSTATES];         /* index to the stored goto table */
249
250         /* temporary vector, indexable by states, terms, or ntokens */
251
252 int     temp1[TEMPSIZE];        /* temporary storage, indexed by terms + ntokens or states */
253 int     lineno = 1;             /* current input line number */
254 int     fatfl = 1;              /* if on, error is fatal */
255 int     nerrors = 0;            /* number of errors */
256
257         /* statistics collection variables */
258
259 int     zzgoent;
260 int     zzgobest;
261 int     zzacent;
262 int     zzexcp;
263 int     zzclose;
264 int     zzrrconf;
265 int     zzsrconf;
266
267 int*    ggreed = lkst[0].lset;
268 int*    pgo = wsets[0].ws.lset;
269 int*    yypgo = &nontrst[0].value;
270
271 int     maxspr = 0;             /* maximum spread of any entry */
272 int     maxoff = 0;             /* maximum offset into a array */
273 int*    pmem = mem0;
274 int*    maxa;
275 int     nxdb = 0;
276 int     adb = 0;
277
278
279         /* storage for information about the nonterminals */
280
281 int**   pres[NNONTERM+2];       /* vector of pointers to productions yielding each nonterminal */
282 Lkset*  pfirst[NNONTERM+2];     /* vector of pointers to first sets for each nonterminal */
283 int     pempty[NNONTERM+1];     /* vector of nonterminals nontrivially deriving e */
284
285         /* random stuff picked out from between functions */
286
287 int     indebug = 0;
288 Wset*   zzcwp = wsets;
289 int     zzgoent = 0;
290 int     zzgobest = 0;
291 int     zzacent = 0;
292 int     zzexcp = 0;
293 int     zzclose = 0;
294 int     zzsrconf = 0;
295 int*    zzmemsz = mem0;
296 int     zzrrconf = 0;
297 int     pidebug = 0;            /* debugging flag for putitem */
298 int     gsdebug = 0;
299 int     cldebug = 0;            /* debugging flag for closure */
300 int     pkdebug = 0;
301 int     g2debug = 0;
302
303 struct
304 {
305         char*   name;
306         long    value;
307 } resrv[] =
308 {
309         "binary",       BINARY,
310         "left",         LEFT,
311         "nonassoc",     BINARY,
312         "prec",         PREC,
313         "right",        RIGHT,
314         "start",        START,
315         "term",         TERM,
316         "token",        TERM,
317         "type",         TYPEDEF,
318         "union",        UNION,
319         0,
320 };
321
322         /* define functions */
323
324 void    main(int, char**);
325 void    others(void);
326 char*   chcopy(char*, char*);
327 char*   writem(int*);
328 char*   symnam(int);
329 void    summary(void);
330 void    error(char*, ...);
331 void    aryfil(int*, int, int);
332 int     setunion(int*, int*);
333 void    prlook(Lkset*);
334 void    cpres(void);
335 void    cpfir(void);
336 int     state(int);
337 void    putitem(int*, Lkset*);
338 void    cempty(void);
339 void    stagen(void);
340 void    closure(int);
341 Lkset*  flset(Lkset*);
342 void    cleantmp(void);
343 void    intr(void);
344 void    setup(int, char**);
345 void    finact(void);
346 int     defin(int, char*);
347 void    defout(int);
348 char*   cstash(char*);
349 long    gettok(void);
350 int     fdtype(int);
351 int     chfind(int, char*);
352 void    cpyunion(void);
353 void    cpycode(void);
354 int     skipcom(void);
355 void    cpyact(int);
356 void    openup(char*, int, int, int, char*);
357 void    output(void);
358 int     apack(int*, int);
359 void    go2out(void);
360 void    go2gen(int);
361 void    precftn(int, int, int);
362 void    wract(int);
363 void    wrstate(int);
364 void    warray(char*, int*, int);
365 void    hideprod(void);
366 void    callopt(void);
367 void    gin(int);
368 void    stin(int);
369 int     nxti(void);
370 void    osummary(void);
371 void    aoutput(void);
372 void    arout(char*, int*, int);
373 int     gtnm(void);
374
375 void
376 main(int argc, char *argv[])
377 {
378
379         setup(argc, argv);      /* initialize and read productions */
380         tbitset = NWORDS(ntokens);
381         cpres();                /* make table of which productions yield a given nonterminal */
382         cempty();               /* make a table of which nonterminals can match the empty string */
383         cpfir();                /* make a table of firsts of nonterminals */
384         stagen();               /* generate the states */
385         output();               /* write the states and the tables */
386         go2out();
387         hideprod();
388         summary();
389         callopt();
390         others();
391         exits(0);
392 }
393
394 /*
395  * put out other arrays, copy the parsers
396  */
397 void
398 others(void)
399 {
400         int c, i, j;
401
402         finput = Bopen(parser, OREAD);
403         if(finput == 0)
404                 error("cannot find parser %s", parser);
405         Blethal(finput, nil);
406         warray("yyr1", levprd, nprod);
407         aryfil(temp1, nprod, 0);
408         PLOOP(1, i)
409                 temp1[i] = prdptr[i+1]-prdptr[i]-2;
410         warray("yyr2", temp1, nprod);
411
412         aryfil(temp1, nstate, -1000);
413         TLOOP(i)
414                 for(j=tstates[i]; j!=0; j=mstates[j])
415                         temp1[j] = i;
416         NTLOOP(i)
417                 for(j=ntstates[i]; j!=0; j=mstates[j])
418                         temp1[j] = -i;
419         warray("yychk", temp1, nstate);
420         warray("yydef", defact, nstate);
421
422         /* put out token translation tables */
423         /* table 1 has 0-256 */
424         aryfil(temp1, 256, 0);
425         c = 0;
426         TLOOP(i) {
427                 j = tokset[i].value;
428                 if(j >= 0 && j < 256) {
429                         if(temp1[j]) {
430                                 print("yacc bug -- cant have 2 different Ts with same value\n");
431                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
432                                 nerrors++;
433                         }
434                         temp1[j] = i;
435                         if(j > c)
436                                 c = j;
437                 }
438         }
439         warray("yytok1", temp1, c+1);
440
441         /* table 2 has PRIVATE-PRIVATE+256 */
442         aryfil(temp1, 256, 0);
443         c = 0;
444         TLOOP(i) {
445                 j = tokset[i].value - PRIVATE;
446                 if(j >= 0 && j < 256) {
447                         if(temp1[j]) {
448                                 print("yacc bug -- cant have 2 different Ts with same value\n");
449                                 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
450                                 nerrors++;
451                         }
452                         temp1[j] = i;
453                         if(j > c)
454                                 c = j;
455                 }
456         }
457         warray("yytok2", temp1, c+1);
458
459         /* table 3 has everything else */
460         Bprint(ftable, "long    yytok3[] =\n{\n");
461         c = 0;
462         TLOOP(i) {
463                 j = tokset[i].value;
464                 if(j >= 0 && j < 256)
465                         continue;
466                 if(j >= PRIVATE && j < 256+PRIVATE)
467                         continue;
468
469                 Bprint(ftable, "%4d,%4d,", j, i);
470                 c++;
471                 if(c%5 == 0)
472                         Bprint(ftable, "\n");
473         }
474         Bprint(ftable, "%4d\n};\n", 0);
475
476         /* copy parser text */
477         while((c=Bgetrune(finput)) != Beof) {
478                 if(c == '$') {
479                         if((c = Bgetrune(finput)) != 'A')
480                                 Bputrune(ftable, '$');
481                         else { /* copy actions */
482                                 faction = Bopen(actname, OREAD);
483                                 if(faction == 0)
484                                         error("cannot reopen action tempfile");
485                                 Blethal(faction, nil);
486                                 while((c=Bgetrune(faction)) != Beof)
487                                         Bputrune(ftable, c);
488                                 Bterm(faction);
489                                 ZAPFILE(actname);
490                                 c = Bgetrune(finput);
491                         }
492                 }
493                 Bputrune(ftable, c);
494         }
495         Bterm(ftable);
496 }
497
498 /*
499  * copies string q into p, returning next free char ptr
500  */
501 char*
502 chcopy(char* p, char* q)
503 {
504         int c;
505
506         while(c = *q) {
507                 if(c == '"')
508                         *p++ = '\\';
509                 *p++ = c;
510                 q++;
511         }
512         *p = 0;
513         return p;
514 }
515
516 /*
517  * creates output string for item pointed to by pp
518  */
519 char*
520 writem(int *pp)
521 {
522         int i,*p;
523         static char sarr[ISIZE];
524         char* q;
525
526         for(p=pp; *p>0; p++)
527                 ;
528         p = prdptr[-*p];
529         q = chcopy(sarr, nontrst[*p-NTBASE].name);
530         q = chcopy(q, ": ");
531         for(;;) {
532                 *q = ' ';
533                 p++;
534                 if(p == pp)
535                         *q = '.';
536                 q++;
537                 *q = '\0';
538                 i = *p;
539                 if(i <= 0)
540                         break;
541                 q = chcopy(q, symnam(i));
542                 if(q > &sarr[ISIZE-30])
543                         error("item too big");
544         }
545
546         /* an item calling for a reduction */
547         i = *pp;
548         if(i < 0 ) {
549                 q = chcopy(q, "    (");
550                 sprint(q, "%d)", -i);
551         }
552         return sarr;
553 }
554
555 /*
556  * return a pointer to the name of symbol i
557  */
558 char*
559 symnam(int i)
560 {
561         char* cp;
562
563         cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
564         if(*cp == ' ')
565                 cp++;
566         return cp;
567 }
568
569 /*
570  * output the summary on y.output
571  */
572 void
573 summary(void)
574 {
575
576         if(foutput != 0) {
577                 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
578                         ntokens, NTERMS, nnonter, NNONTERM);
579                 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
580                         nprod, NPROD, nstate, NSTATES);
581                 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
582                         zzsrconf, zzrrconf);
583                 Bprint(foutput, "%d/%d working sets used\n",
584                         (int)(zzcwp-wsets), WSETSIZE);
585                 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
586                         (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
587                 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
588                 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
589                 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
590                 Bprint(foutput, "%d goto entries\n", zzgoent);
591                 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
592         }
593         if(zzsrconf != 0 || zzrrconf != 0) {
594                 print("\nconflicts: ");
595                 if(zzsrconf)
596                         print("%d shift/reduce", zzsrconf);
597                 if(zzsrconf && zzrrconf)
598                         print(", ");
599                 if(zzrrconf)
600                         print("%d reduce/reduce", zzrrconf);
601                 print("\n");
602         }
603         if(ftemp != 0) {
604                 Bterm(ftemp);
605                 ftemp = 0;
606         }
607         if(fdefine != 0) {
608                 Bterm(fdefine);
609                 fdefine = 0;
610         }
611 }
612
613 /*
614  * write out error comment -- NEEDS WORK
615  */
616 void
617 error(char *s, ...)
618 {
619
620         nerrors++;
621         fprint(2, "\n fatal error:");
622         fprint(2, s, (&s)[1]);
623         fprint(2, ", %s:%d\n", infile, lineno);
624         if(!fatfl)
625                 return;
626         summary();
627         cleantmp();
628         exits("error");
629 }
630
631 /*
632  * set elements 0 through n-1 to c
633  */
634 void
635 aryfil(int *v, int n, int c)
636 {
637         int i;
638
639         for(i=0; i<n; i++)
640                 v[i] = c;
641 }
642
643 /*
644  * set a to the union of a and b
645  * return 1 if b is not a subset of a, 0 otherwise
646  */
647 int
648 setunion(int *a, int *b)
649 {
650         int i, x, sub;
651
652         sub = 0;
653         SETLOOP(i) {
654                 x = *a;
655                 *a |= *b;
656                 if(*a != x)
657                         sub = 1;
658                 a++;
659                 b++;
660         }
661         return sub;
662 }
663
664 void
665 prlook(Lkset* p)
666 {
667         int j, *pp;
668
669         pp = p->lset;
670         if(pp == 0)
671                 Bprint(foutput, "\tNULL");
672         else {
673                 Bprint(foutput, " { ");
674                 TLOOP(j)
675                         if(BIT(pp,j))
676                                 Bprint(foutput, "%s ", symnam(j));
677                 Bprint(foutput, "}");
678         }
679 }
680
681 /*
682  * compute an array with the beginnings of  productions yielding given nonterminals
683  * The array pres points to these lists
684  * the array pyield has the lists: the total size is only NPROD+1
685  */
686 void
687 cpres(void)
688 {
689         int c, j, i, **pmem;
690         static int *pyield[NPROD];
691
692         pmem = pyield;
693         NTLOOP(i) {
694                 c = i+NTBASE;
695                 pres[i] = pmem;
696                 fatfl = 0;      /* make undefined  symbols  nonfatal */
697                 PLOOP(0, j)
698                         if(*prdptr[j] == c)
699                                 *pmem++ =  prdptr[j]+1;
700                 if(pres[i] == pmem)
701                         error("nonterminal %s not defined!", nontrst[i].name);
702         }
703         pres[i] = pmem;
704         fatfl = 1;
705         if(nerrors) {
706                 summary();
707                 cleantmp();
708                 exits("error");
709         }
710         if(pmem != &pyield[nprod])
711                 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
712 }
713
714 /*
715  * compute an array with the first of nonterminals
716  */
717 void
718 cpfir(void)
719 {
720         int *p, **s, i, **t, ch, changes;
721
722         zzcwp = &wsets[nnonter];
723         NTLOOP(i) {
724                 aryfil(wsets[i].ws.lset, tbitset, 0);
725                 t = pres[i+1];
726                 /* initially fill the sets */
727                 for(s=pres[i]; s<t; ++s)
728                         for(p = *s; (ch = *p) > 0; ++p) {
729                                 if(ch < NTBASE) {
730                                         SETBIT(wsets[i].ws.lset, ch);
731                                         break;
732                                 }
733                                 if(!pempty[ch-NTBASE])
734                                         break;
735                         }
736         }
737
738         /* now, reflect transitivity */
739         changes = 1;
740         while(changes) {
741                 changes = 0;
742                 NTLOOP(i) {
743                         t = pres[i+1];
744                         for(s = pres[i]; s < t; ++s)
745                                 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
746                                         changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
747                                         if(!pempty[ch])
748                                                 break;
749                                 }
750                 }
751         }
752
753         NTLOOP(i)
754                 pfirst[i] = flset(&wsets[i].ws);
755         if(!indebug)
756                 return;
757         if(foutput != 0)
758                 NTLOOP(i) {
759                         Bprint(foutput, "\n%s: ", nontrst[i].name);
760                         prlook(pfirst[i]);
761                         Bprint(foutput, " %d\n", pempty[i]);
762                 }
763 }
764
765 /*
766  * sorts last state,and sees if it equals earlier ones. returns state number
767  */
768 int
769 state(int c)
770 {
771         Item *p1, *p2, *k, *l, *q1, *q2;
772         int size1, size2, i;
773
774         p1 = pstate[nstate];
775         p2 = pstate[nstate+1];
776         if(p1 == p2)
777                 return 0;       /* null state */
778         /* sort the items */
779         for(k = p2-1; k > p1; k--)      /* make k the biggest */
780                 for(l = k-1; l >= p1; --l)
781                         if(l->pitem > k->pitem) {
782                                 int *s;
783                                 Lkset *ss;
784
785                                 s = k->pitem;
786                                 k->pitem = l->pitem;
787                                 l->pitem = s;
788                                 ss = k->look;
789                                 k->look = l->look;
790                                 l->look = ss;
791                         }
792         size1 = p2 - p1;        /* size of state */
793
794         for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
795                 /* get ith state */
796                 q1 = pstate[i];
797                 q2 = pstate[i+1];
798                 size2 = q2 - q1;
799                 if(size1 != size2)
800                         continue;
801                 k = p1;
802                 for(l = q1; l < q2; l++) {
803                         if(l->pitem != k->pitem)
804                                 break;
805                         k++;
806                 }
807                 if(l != q2)
808                         continue;
809                 /* found it */
810                 pstate[nstate+1] = pstate[nstate];      /* delete last state */
811                 /* fix up lookaheads */
812                 if(nolook)
813                         return i;
814                 for(l = q1, k = p1; l < q2; ++l, ++k ) {
815                         int s;
816
817                         SETLOOP(s)
818                                 clset.lset[s] = l->look->lset[s];
819                         if(setunion(clset.lset, k->look->lset)) {
820                                 tystate[i] = MUSTDO;
821                                 /* register the new set */
822                                 l->look = flset( &clset );
823                         }
824                 }
825                 return i;
826         }
827         /* state is new */
828         if(nolook)
829                 error("yacc state/nolook error");
830         pstate[nstate+2] = p2;
831         if(nstate+1 >= NSTATES)
832                 error("too many states");
833         if(c >= NTBASE) {
834                 mstates[nstate] = ntstates[c-NTBASE];
835                 ntstates[c-NTBASE] = nstate;
836         } else {
837                 mstates[nstate] = tstates[c];
838                 tstates[c] = nstate;
839         }
840         tystate[nstate] = MUSTDO;
841         return nstate++;
842 }
843
844 void
845 putitem(int *ptr, Lkset *lptr)
846 {
847         Item *j;
848
849         if(pidebug && foutput != 0)
850                 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
851         j = pstate[nstate+1];
852         j->pitem = ptr;
853         if(!nolook)
854                 j->look = flset(lptr);
855         pstate[nstate+1] = ++j;
856         if((int*)j > zzmemsz) {
857                 zzmemsz = (int*)j;
858                 if(zzmemsz >=  &mem0[MEMSIZE])
859                         error("out of state space");
860         }
861 }
862
863 /*
864  * mark nonterminals which derive the empty string
865  * also, look for nonterminals which don't derive any token strings
866  */
867 void
868 cempty(void)
869 {
870
871         int i, *p;
872
873         /* first, use the array pempty to detect productions that can never be reduced */
874         /* set pempty to WHONOWS */
875         aryfil(pempty, nnonter+1, WHOKNOWS);
876
877         /* now, look at productions, marking nonterminals which derive something */
878 more:
879         PLOOP(0, i) {
880                 if(pempty[*prdptr[i] - NTBASE])
881                         continue;
882                 for(p = prdptr[i]+1; *p >= 0; ++p)
883                         if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
884                                 break;
885                 /* production can be derived */
886                 if(*p < 0) {
887                         pempty[*prdptr[i]-NTBASE] = OK;
888                         goto more;
889                 }
890         }
891
892         /* now, look at the nonterminals, to see if they are all OK */
893         NTLOOP(i) {
894                 /* the added production rises or falls as the start symbol ... */
895                 if(i == 0)
896                         continue;
897                 if(pempty[i] != OK) {
898                         fatfl = 0;
899                         error("nonterminal %s never derives any token string", nontrst[i].name);
900                 }
901         }
902
903         if(nerrors) {
904                 summary();
905                 cleantmp();
906                 exits("error");
907         }
908
909         /* now, compute the pempty array, to see which nonterminals derive the empty string */
910         /* set pempty to WHOKNOWS */
911         aryfil( pempty, nnonter+1, WHOKNOWS);
912
913         /* loop as long as we keep finding empty nonterminals */
914
915 again:
916         PLOOP(1, i) {
917                 /* not known to be empty */
918                 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
919                         for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
920                                 ;
921                         /* we have a nontrivially empty nonterminal */
922                         if(*p < 0) {
923                                 pempty[*prdptr[i]-NTBASE] = EMPTY;
924                                 /* got one ... try for another */
925                                 goto again;
926                         }
927                 }
928         }
929 }
930
931 /*
932  * generate the states
933  */
934 void
935 stagen(void)
936 {
937
938         int c, i, j, more;
939         Wset *p, *q;
940
941         /* initialize */
942         nstate = 0;
943
944         /* THIS IS FUNNY from the standpoint of portability
945          * it represents the magic moment when the mem0 array, which has
946          * been holding the productions, starts to hold item pointers, of a
947          * different type...
948          * someday, alloc should be used to allocate all this stuff... for now, we
949          * accept that if pointers don't fit in integers, there is a problem...
950          */
951
952         pstate[0] = pstate[1] = (Item*)mem;
953         aryfil(clset.lset, tbitset, 0);
954         putitem(prdptr[0]+1, &clset);
955         tystate[0] = MUSTDO;
956         nstate = 1;
957         pstate[2] = pstate[1];
958
959         aryfil(amem, ACTSIZE, 0);
960
961         /* now, the main state generation loop */
962         for(more=1; more;) {
963                 more = 0;
964                 SLOOP(i) {
965                         if(tystate[i] != MUSTDO)
966                                 continue;
967                         tystate[i] = DONE;
968                         aryfil(temp1, nnonter+1, 0);
969                         /* take state i, close it, and do gotos */
970                         closure(i);
971                         /* generate goto's */
972                         WSLOOP(wsets, p) {
973                                 if(p->flag)
974                                         continue;
975                                 p->flag = 1;
976                                 c = *(p->pitem);
977                                 if(c <= 1) {
978                                         if(pstate[i+1]-pstate[i] <= p-wsets)
979                                                 tystate[i] = MUSTLOOKAHEAD;
980                                         continue;
981                                 }
982                                 /* do a goto on c */
983                                 WSLOOP(p, q)
984                                         /* this item contributes to the goto */
985                                         if(c == *(q->pitem)) {
986                                                 putitem(q->pitem+1, &q->ws);
987                                                 q->flag = 1;
988                                         }
989                                 if(c < NTBASE)
990                                         state(c);       /* register new state */
991                                 else
992                                         temp1[c-NTBASE] = state(c);
993                         }
994                         if(gsdebug && foutput != 0) {
995                                 Bprint(foutput, "%d: ", i);
996                                 NTLOOP(j)
997                                         if(temp1[j])
998                                                 Bprint(foutput, "%s %d, ",
999                                                 nontrst[j].name, temp1[j]);
1000                                 Bprint(foutput, "\n");
1001                         }
1002                         indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1003                         /* do some more */
1004                         more = 1;
1005                 }
1006         }
1007 }
1008
1009 /*
1010  * generate the closure of state i
1011  */
1012 void
1013 closure(int i)
1014 {
1015
1016         Wset *u, *v;
1017         Item *p, *q;
1018         int c, ch, work, k, *pi, **s, **t;
1019
1020         zzclose++;
1021
1022         /* first, copy kernel of state i to wsets */
1023         cwp = wsets;
1024         ITMLOOP(i, p, q) {
1025                 cwp->pitem = p->pitem;
1026                 cwp->flag = 1;                  /* this item must get closed */
1027                 SETLOOP(k)
1028                         cwp->ws.lset[k] = p->look->lset[k];
1029                 WSBUMP(cwp);
1030         }
1031
1032         /* now, go through the loop, closing each item */
1033         work = 1;
1034         while(work) {
1035                 work = 0;
1036                 WSLOOP(wsets, u) {
1037                         if(u->flag == 0)
1038                                 continue;
1039                         /* dot is before c */
1040                         c = *(u->pitem);
1041                         if(c < NTBASE) {
1042                                 u->flag = 0;
1043                                 /* only interesting case is where . is before nonterminal */
1044                                 continue;
1045                         }
1046
1047                         /* compute the lookahead */
1048                         aryfil(clset.lset, tbitset, 0);
1049
1050                         /* find items involving c */
1051                         WSLOOP(u, v)
1052                                 if(v->flag == 1 && *(pi=v->pitem) == c) {
1053                                         v->flag = 0;
1054                                         if(nolook)
1055                                                 continue;
1056                                         while((ch = *++pi) > 0) {
1057                                                 /* terminal symbol */
1058                                                 if(ch < NTBASE) {
1059                                                         SETBIT(clset.lset, ch);
1060                                                         break;
1061                                                 }
1062                                                 /* nonterminal symbol */
1063                                                 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1064                                                 if(!pempty[ch-NTBASE])
1065                                                         break;
1066                                         }
1067                                         if(ch <= 0)
1068                                                 setunion(clset.lset, v->ws.lset);
1069                                 }
1070
1071                         /*
1072                          * now loop over productions derived from c
1073                          * c is now nonterminal number
1074                          */
1075                         c -= NTBASE;
1076                         t = pres[c+1];
1077                         for(s = pres[c]; s < t; ++s) {
1078                                 /*
1079                                  * put these items into the closure
1080                                  * is the item there
1081                                  */
1082                                 WSLOOP(wsets, v)
1083                                         /* yes, it is there */
1084                                         if(v->pitem == *s) {
1085                                                 if(nolook)
1086                                                         goto nexts;
1087                                                 if(setunion(v->ws.lset, clset.lset))
1088                                                         v->flag = work = 1;
1089                                                 goto nexts;
1090                                         }
1091
1092                                 /*  not there; make a new entry */
1093                                 if(cwp-wsets+1 >= WSETSIZE)
1094                                         error( "working set overflow");
1095                                 cwp->pitem = *s;
1096                                 cwp->flag = 1;
1097                                 if(!nolook) {
1098                                         work = 1;
1099                                         SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1100                                 }
1101                                 WSBUMP(cwp);
1102
1103                         nexts:;
1104                         }
1105                 }
1106         }
1107
1108         /* have computed closure; flags are reset; return */
1109         if(cwp > zzcwp)
1110                 zzcwp = cwp;
1111         if(cldebug && foutput != 0) {
1112                 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1113                 WSLOOP(wsets, u) {
1114                         if(u->flag)
1115                                 Bprint(foutput, "flag set!\n");
1116                         u->flag = 0;
1117                         Bprint(foutput, "\t%s", writem(u->pitem));
1118                         prlook(&u->ws);
1119                         Bprint(foutput, "\n");
1120                 }
1121         }
1122 }
1123
1124 /*
1125  * decide if the lookahead set pointed to by p is known
1126  * return pointer to a perminent location for the set
1127  */
1128 Lkset*
1129 flset(Lkset *p)
1130 {
1131         Lkset *q;
1132         int *u, *v, *w, j;
1133
1134         for(q = &lkst[nlset]; q-- > lkst;) {
1135                 u = p->lset;
1136                 v = q->lset;
1137                 w = &v[tbitset];
1138                 while(v < w)
1139                         if(*u++ != *v++)
1140                                 goto more;
1141                 /* we have matched */
1142                 return q;
1143         more:;
1144         }
1145         /* add a new one */
1146         q = &lkst[nlset++];
1147         if(nlset >= LSETSIZE)
1148                 error("too many lookahead sets");
1149         SETLOOP(j)
1150                 q->lset[j] = p->lset[j];
1151         return q;
1152 }
1153
1154 void
1155 cleantmp(void)
1156 {
1157         ZAPFILE(actname);
1158         ZAPFILE(tempname);
1159 }
1160
1161 void
1162 intr(void)
1163 {
1164         cleantmp();
1165         exits("interrupted");
1166 }
1167
1168 void
1169 usage(void)
1170 {
1171         fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1172         exits("usage");
1173 }
1174
1175 void
1176 setup(int argc, char *argv[])
1177 {
1178         long c, t;
1179         int i, j, lev, ty, ytab, *p;
1180         int vflag, dflag, stem;
1181         char actnm[8], *stemc, *s, dirbuf[128];
1182
1183         ytab = 0;
1184         vflag = 0;
1185         dflag = 0;
1186         stem = 0;
1187         stemc = "y";
1188         foutput = 0;
1189         fdefine = 0;
1190         fdebug = 0;
1191         ARGBEGIN{
1192         case 'v':
1193         case 'V':
1194                 vflag++;
1195                 break;
1196         case 'D':
1197                 yydebug = EARGF(usage());
1198                 break;
1199         case 'd':
1200                 dflag++;
1201                 break;
1202         case 'o':
1203                 ytab++;
1204                 ytabc = EARGF(usage());
1205                 break;
1206         case 's':
1207                 stem++;
1208                 stemc = ARGF();
1209                 break;
1210         case 'S':
1211                 parser = PARSERS;
1212                 break;
1213         default:
1214                 error("illegal option: %c", ARGC());
1215         }ARGEND
1216         openup(stemc, dflag, vflag, ytab, ytabc);
1217
1218         ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1219         faction = Bopen(actname = mktemp(tactname), OWRITE);
1220         if(ftemp == 0 || faction == 0)
1221                 error("cannot open temp file");
1222         Blethal(ftemp, nil);
1223         Blethal(faction, nil);
1224         if(argc < 1)
1225                 error("no input file");
1226         infile = argv[0];
1227         if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1228                 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1229                 s = malloc(i);
1230                 if(s != nil){
1231                         snprint(s, i, "%s/%s", dirbuf, infile);
1232                         cleanname(s);
1233                         infile = s;
1234                 }
1235         }
1236         finput = Bopen(infile, OREAD);
1237         if(finput == 0)
1238                 error("cannot open '%s'", argv[0]);
1239         Blethal(finput, nil);
1240         cnamp = cnames;
1241
1242         defin(0, "$end");
1243         extval = PRIVATE;       /* tokens start in unicode 'private use' */
1244         defin(0, "error");
1245         defin(1, "$accept");
1246         defin(0, "$unk");
1247         mem = mem0;
1248         i = 0;
1249
1250         for(t = gettok(); t != MARK && t != ENDFILE;)
1251         switch(t) {
1252         case ';':
1253                 t = gettok();
1254                 break;
1255
1256         case START:
1257                 if(gettok() != IDENTIFIER)
1258                         error("bad %%start construction");
1259                 start = chfind(1, tokname);
1260                 t = gettok();
1261                 continue;
1262
1263         case TYPEDEF:
1264                 if(gettok() != TYPENAME)
1265                         error("bad syntax in %%type");
1266                 ty = numbval;
1267                 for(;;) {
1268                         t = gettok();
1269                         switch(t) {
1270                         case IDENTIFIER:
1271                                 if((t=chfind(1, tokname)) < NTBASE) {
1272                                         j = TYPE(toklev[t]);
1273                                         if(j != 0 && j != ty)
1274                                                 error("type redeclaration of token %s",
1275                                                         tokset[t].name);
1276                                         else
1277                                                 SETTYPE(toklev[t], ty);
1278                                 } else {
1279                                         j = nontrst[t-NTBASE].value;
1280                                         if(j != 0 && j != ty)
1281                                                 error("type redeclaration of nonterminal %s",
1282                                                         nontrst[t-NTBASE].name );
1283                                         else
1284                                                 nontrst[t-NTBASE].value = ty;
1285                                 }
1286                         case ',':
1287                                 continue;
1288                         case ';':
1289                                 t = gettok();
1290                         default:
1291                                 break;
1292                         }
1293                         break;
1294                 }
1295                 continue;
1296
1297         case UNION:
1298                 /* copy the union declaration to the output */
1299                 cpyunion();
1300                 t = gettok();
1301                 continue;
1302
1303         case LEFT:
1304         case BINARY:
1305         case RIGHT:
1306                 i++;
1307
1308         case TERM:
1309                 /* nonzero means new prec. and assoc. */
1310                 lev = t-TERM;
1311                 ty = 0;
1312
1313                 /* get identifiers so defined */
1314                 t = gettok();
1315
1316                 /* there is a type defined */
1317                 if(t == TYPENAME) {
1318                         ty = numbval;
1319                         t = gettok();
1320                 }
1321                 for(;;) {
1322                         switch(t) {
1323                         case ',':
1324                                 t = gettok();
1325                                 continue;
1326
1327                         case ';':
1328                                 break;
1329
1330                         case IDENTIFIER:
1331                                 j = chfind(0, tokname);
1332                                 if(j >= NTBASE)
1333                                         error("%s defined earlier as nonterminal", tokname);
1334                                 if(lev) {
1335                                         if(ASSOC(toklev[j]))
1336                                                 error("redeclaration of precedence of %s", tokname);
1337                                         SETASC(toklev[j], lev);
1338                                         SETPLEV(toklev[j], i);
1339                                 }
1340                                 if(ty) {
1341                                         if(TYPE(toklev[j]))
1342                                                 error("redeclaration of type of %s", tokname);
1343                                         SETTYPE(toklev[j],ty);
1344                                 }
1345                                 t = gettok();
1346                                 if(t == NUMBER) {
1347                                         tokset[j].value = numbval;
1348                                         if(j < ndefout && j > 3)
1349                                                 error("please define type number of %s earlier",
1350                                                         tokset[j].name);
1351                                         t = gettok();
1352                                 }
1353                                 continue;
1354                         }
1355                         break;
1356                 }
1357                 continue;
1358
1359         case LCURLY:
1360                 defout(0);
1361                 cpycode();
1362                 t = gettok();
1363                 continue;
1364
1365         default:
1366                 error("syntax error");
1367         }
1368         if(t == ENDFILE)
1369                 error("unexpected EOF before %%");
1370
1371         /* t is MARK */
1372         Bprint(ftable, "extern  int     yyerrflag;\n");
1373         Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1374         Bprint(ftable, "#define YYMAXDEPTH      150\n");
1375         Bprint(ftable, "#endif\n" );
1376         if(!ntypes) {
1377                 Bprint(ftable, "#ifndef YYSTYPE\n");
1378                 Bprint(ftable, "#define YYSTYPE int\n");
1379                 Bprint(ftable, "#endif\n");
1380         }
1381         Bprint(ftable, "YYSTYPE yylval;\n");
1382         Bprint(ftable, "YYSTYPE yyval;\n");
1383
1384         prdptr[0] = mem;
1385
1386         /* added production */
1387         *mem++ = NTBASE;
1388
1389         /* if start is 0, we will overwrite with the lhs of the first rule */
1390         *mem++ = start;
1391         *mem++ = 1;
1392         *mem++ = 0;
1393         prdptr[1] = mem;
1394         while((t=gettok()) == LCURLY)
1395                 cpycode();
1396         if(t != IDENTCOLON)
1397                 error("bad syntax on first rule");
1398
1399         if(!start)
1400                 prdptr[0][1] = chfind(1, tokname);
1401
1402         /* read rules */
1403         while(t != MARK && t != ENDFILE) {
1404                 /* process a rule */
1405                 rlines[nprod] = lineno;
1406                 if(t == '|')
1407                         *mem++ = *prdptr[nprod-1];
1408                 else
1409                         if(t == IDENTCOLON) {
1410                                 *mem = chfind(1, tokname);
1411                                 if(*mem < NTBASE)
1412                                         error("token illegal on LHS of grammar rule");
1413                                 mem++;
1414                         } else
1415                                 error("illegal rule: missing semicolon or | ?");
1416                 /* read rule body */
1417                 t = gettok();
1418
1419         more_rule:
1420                 while(t == IDENTIFIER) {
1421                         *mem = chfind(1, tokname);
1422                         if(*mem < NTBASE)
1423                                 levprd[nprod] = toklev[*mem];
1424                         mem++;
1425                         t = gettok();
1426                 }
1427                 if(t == PREC) {
1428                         if(gettok() != IDENTIFIER)
1429                                 error("illegal %%prec syntax");
1430                         j = chfind(2, tokname);
1431                         if(j >= NTBASE)
1432                                 error("nonterminal %s illegal after %%prec",
1433                                         nontrst[j-NTBASE].name);
1434                         levprd[nprod] = toklev[j];
1435                         t = gettok();
1436                 }
1437                 if(t == '=') {
1438                         levprd[nprod] |= ACTFLAG;
1439                         Bprint(faction, "\ncase %d:", nprod);
1440                         cpyact(mem-prdptr[nprod]-1);
1441                         Bprint(faction, " break;");
1442                         if((t=gettok()) == IDENTIFIER) {
1443
1444                                 /* action within rule... */
1445                                 sprint(actnm, "$$%d", nprod);
1446
1447                                 /* make it a nonterminal */
1448                                 j = chfind(1, actnm);
1449
1450                                 /*
1451                                  * the current rule will become rule number nprod+1
1452                                  * move the contents down, and make room for the null
1453                                  */
1454                                 for(p = mem; p >= prdptr[nprod]; --p)
1455                                         p[2] = *p;
1456                                 mem += 2;
1457
1458                                 /* enter null production for action */
1459                                 p = prdptr[nprod];
1460                                 *p++ = j;
1461                                 *p++ = -nprod;
1462
1463                                 /* update the production information */
1464                                 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1465                                 levprd[nprod] = ACTFLAG;
1466                                 if(++nprod >= NPROD)
1467                                         error("more than %d rules", NPROD);
1468                                 prdptr[nprod] = p;
1469
1470                                 /* make the action appear in the original rule */
1471                                 *mem++ = j;
1472
1473                                 /* get some more of the rule */
1474                                 goto more_rule;
1475                         }
1476                 }
1477
1478                 while(t == ';')
1479                         t = gettok();
1480                 *mem++ = -nprod;
1481
1482                 /* check that default action is reasonable */
1483                 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1484
1485                         /* no explicit action, LHS has value */
1486                         int tempty;
1487
1488                         tempty = prdptr[nprod][1];
1489                         if(tempty < 0)
1490                                 error("must return a value, since LHS has a type");
1491                         else
1492                                 if(tempty >= NTBASE)
1493                                         tempty = nontrst[tempty-NTBASE].value;
1494                                 else
1495                                         tempty = TYPE(toklev[tempty]);
1496                         if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1497                                 error("default action causes potential type clash");
1498                 }
1499                 nprod++;
1500                 if(nprod >= NPROD)
1501                         error("more than %d rules", NPROD);
1502                 prdptr[nprod] = mem;
1503                 levprd[nprod] = 0;
1504         }
1505
1506         /* end of all rules */
1507         defout(1);
1508
1509         finact();
1510         if(t == MARK) {
1511                 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1512                 while((c=Bgetrune(finput)) != Beof)
1513                         Bputrune(ftable, c);
1514         }
1515         Bterm(finput);
1516 }
1517
1518 /*
1519  * finish action routine
1520  */
1521 void
1522 finact(void)
1523 {
1524
1525         Bterm(faction);
1526         Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1527         Bprint(ftable, "#define YYERRCODE %d\n", 2);
1528 }
1529
1530 /*
1531  * define s to be a terminal if t=0
1532  * or a nonterminal if t=1
1533  */
1534 int
1535 defin(int nt, char *s)
1536 {
1537         int val;
1538         Rune rune;
1539
1540         val = 0;
1541         if(nt) {
1542                 nnonter++;
1543                 if(nnonter >= NNONTERM)
1544                         error("too many nonterminals, limit %d",NNONTERM);
1545                 nontrst[nnonter].name = cstash(s);
1546                 return NTBASE + nnonter;
1547         }
1548
1549         /* must be a token */
1550         ntokens++;
1551         if(ntokens >= NTERMS)
1552                 error("too many terminals, limit %d", NTERMS);
1553         tokset[ntokens].name = cstash(s);
1554
1555         /* establish value for token */
1556         /* single character literal */
1557         if(s[0] == ' ') {
1558                 val = chartorune(&rune, &s[1]);
1559                 if(s[val+1] == 0) {
1560                         val = rune;
1561                         goto out;
1562                 }
1563         }
1564
1565         /* escape sequence */
1566         if(s[0] == ' ' && s[1] == '\\') {
1567                 if(s[3] == 0) {
1568                         /* single character escape sequence */
1569                         switch(s[2]) {
1570                         case 'n':       val = '\n'; break;
1571                         case 'r':       val = '\r'; break;
1572                         case 'b':       val = '\b'; break;
1573                         case 't':       val = '\t'; break;
1574                         case 'f':       val = '\f'; break;
1575                         case '\'':      val = '\''; break;
1576                         case '"':       val = '"'; break;
1577                         case '\\':      val = '\\'; break;
1578                         default:        error("invalid escape");
1579                         }
1580                         goto out;
1581                 }
1582
1583                 /* \nnn sequence */
1584                 if(s[2] >= '0' && s[2] <= '7') {
1585                         if(s[3] < '0' ||
1586                            s[3] > '7' ||
1587                            s[4] < '0' ||
1588                            s[4] > '7' ||
1589                            s[5] != 0)
1590                                 error("illegal \\nnn construction");
1591                         val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1592                         if(val == 0)
1593                                 error("'\\000' is illegal");
1594                         goto out;
1595                 }
1596                 error("unknown escape");
1597         }
1598         val = extval++;
1599
1600 out:
1601         tokset[ntokens].value = val;
1602         toklev[ntokens] = 0;
1603         return ntokens;
1604 }
1605
1606 /*
1607  * write out the defines (at the end of the declaration section)
1608  */
1609 void
1610 defout(int last)
1611 {
1612         int i, c;
1613         char sar[NAMESIZE+10];
1614
1615         for(i=ndefout; i<=ntokens; i++) {
1616                 /* non-literals */
1617                 c = tokset[i].name[0];
1618                 if(c != ' ' && c != '$') {
1619                         Bprint(ftable, "#define %s      %d\n",
1620                                 tokset[i].name, tokset[i].value);
1621                         if(fdefine)
1622                                 Bprint(fdefine, "#define\t%s\t%d\n",
1623                                         tokset[i].name, tokset[i].value);
1624                 }
1625         }
1626         ndefout = ntokens+1;
1627         if(last && fdebug) {
1628                 Bprint(fdebug, "char*   yytoknames[] =\n{\n");
1629                 TLOOP(i) {
1630                         if(tokset[i].name) {
1631                                 chcopy(sar, tokset[i].name);
1632                                 Bprint(fdebug, "\t\"%s\",\n", sar);
1633                                 continue;
1634                         }
1635                         Bprint(fdebug, "\t0,\n");
1636                 }
1637                 Bprint(fdebug, "};\n");
1638         }
1639 }
1640
1641 char*
1642 cstash(char *s)
1643 {
1644         char *temp;
1645
1646         temp = cnamp;
1647         do {
1648                 if(cnamp >= &cnames[cnamsz])
1649                         error("too many characters in id's and literals");
1650                 else
1651                         *cnamp++ = *s;
1652         } while(*s++);
1653         return temp;
1654 }
1655
1656 long
1657 gettok(void)
1658 {
1659         long c;
1660         Rune rune;
1661         int i, base, match, reserve;
1662         static int peekline;
1663
1664 begin:
1665         reserve = 0;
1666         lineno += peekline;
1667         peekline = 0;
1668         c = Bgetrune(finput);
1669         while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1670                 if(c == '\n')
1671                         lineno++;
1672                 c = Bgetrune(finput);
1673         }
1674
1675         /* skip comment */
1676         if(c == '/') {
1677                 lineno += skipcom();
1678                 goto begin;
1679         }
1680         switch(c) {
1681         case Beof:
1682                 return ENDFILE;
1683
1684         case '{':
1685                 Bungetrune(finput);
1686                 return '=';
1687
1688         case '<':
1689                 /* get, and look up, a type name (union member name) */
1690                 i = 0;
1691                 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1692                         rune = c;
1693                         c = runetochar(&tokname[i], &rune);
1694                         if(i < NAMESIZE)
1695                                 i += c;
1696                 }
1697                 if(c != '>')
1698                         error("unterminated < ... > clause");
1699                 tokname[i] = 0;
1700                 for(i=1; i<=ntypes; i++)
1701                         if(!strcmp(typeset[i], tokname)) {
1702                                 numbval = i;
1703                                 return TYPENAME;
1704                         }
1705                 ntypes++;
1706                 numbval = ntypes;
1707                 typeset[numbval] = cstash(tokname);
1708                 return TYPENAME;
1709
1710         case '"':
1711         case '\'':
1712                 match = c;
1713                 tokname[0] = ' ';
1714                 i = 1;
1715                 for(;;) {
1716                         c = Bgetrune(finput);
1717                         if(c == '\n' || c <= 0)
1718                                 error("illegal or missing ' or \"" );
1719                         if(c == '\\') {
1720                                 tokname[i] = '\\';
1721                                 if(i < NAMESIZE)
1722                                         i++;
1723                                 c = Bgetrune(finput);
1724                         } else
1725                                 if(c == match)
1726                                         break;
1727                         rune = c;
1728                         c = runetochar(&tokname[i], &rune);
1729                         if(i < NAMESIZE)
1730                                 i += c;
1731                 }
1732                 break;
1733
1734         case '%':
1735         case '\\':
1736                 switch(c = Bgetrune(finput)) {
1737                 case '0':       return TERM;
1738                 case '<':       return LEFT;
1739                 case '2':       return BINARY;
1740                 case '>':       return RIGHT;
1741                 case '%':
1742                 case '\\':      return MARK;
1743                 case '=':       return PREC;
1744                 case '{':       return LCURLY;
1745                 default:        reserve = 1;
1746                 }
1747
1748         default:
1749                 /* number */
1750                 if(isdigit(c)) {
1751                         numbval = c-'0';
1752                         base = (c=='0')? 8: 10;
1753                         for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1754                                 numbval = numbval*base + (c-'0');
1755                         Bungetrune(finput);
1756                         return NUMBER;
1757                 }
1758                 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1759                         i = 0;
1760                         while(islower(c) || isupper(c) || isdigit(c) ||
1761                             c == '-' || c=='_' || c=='.' || c=='$') {
1762                                 if(reserve && isupper(c))
1763                                         c += 'a'-'A';
1764                                 rune = c;
1765                                 c = runetochar(&tokname[i], &rune);
1766                                 if(i < NAMESIZE)
1767                                         i += c;
1768                                 c = Bgetrune(finput);
1769                         }
1770                 } else
1771                         return c;
1772                 Bungetrune(finput);
1773         }
1774         tokname[i] = 0;
1775
1776         /* find a reserved word */
1777         if(reserve) {
1778                 for(c=0; resrv[c].name; c++)
1779                         if(strcmp(tokname, resrv[c].name) == 0)
1780                                 return resrv[c].value;
1781                 error("invalid escape, or illegal reserved word: %s", tokname);
1782         }
1783
1784         /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1785         c = Bgetrune(finput);
1786         while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1787                 if(c == '\n')
1788                         peekline++;
1789                 /* look for comments */
1790                 if(c == '/')
1791                         peekline += skipcom();
1792                 c = Bgetrune(finput);
1793         }
1794         if(c == ':')
1795                 return IDENTCOLON;
1796         Bungetrune(finput);
1797         return IDENTIFIER;
1798 }
1799
1800 /*
1801  * determine the type of a symbol
1802  */
1803 int
1804 fdtype(int t)
1805 {
1806         int v;
1807
1808         if(t >= NTBASE)
1809                 v = nontrst[t-NTBASE].value;
1810         else
1811                 v = TYPE(toklev[t]);
1812         if(v <= 0)
1813                 error("must specify type for %s", (t>=NTBASE)?
1814                         nontrst[t-NTBASE].name: tokset[t].name);
1815         return v;
1816 }
1817
1818 int
1819 chfind(int t, char *s)
1820 {
1821         int i;
1822
1823         if(s[0] == ' ')
1824                 t = 0;
1825         TLOOP(i)
1826                 if(!strcmp(s, tokset[i].name))
1827                         return i;
1828         NTLOOP(i)
1829                 if(!strcmp(s, nontrst[i].name))
1830                         return NTBASE+i;
1831
1832         /* cannot find name */
1833         if(t > 1)
1834                 error("%s should have been defined earlier", s);
1835         return defin(t, s);
1836 }
1837
1838 /*
1839  * copy the union declaration to the output, and the define file if present
1840  */
1841 void
1842 cpyunion(void)
1843 {
1844         long c;
1845         int level;
1846
1847         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1848         Bprint(ftable, "typedef union ");
1849         if(fdefine != 0)
1850                 Bprint(fdefine, "\ntypedef union ");
1851
1852         level = 0;
1853         for(;;) {
1854                 if((c=Bgetrune(finput)) == Beof)
1855                         error("EOF encountered while processing %%union");
1856                 Bputrune(ftable, c);
1857                 if(fdefine != 0)
1858                         Bputrune(fdefine, c);
1859                 switch(c) {
1860                 case '\n':
1861                         lineno++;
1862                         break;
1863                 case '{':
1864                         level++;
1865                         break;
1866                 case '}':
1867                         level--;
1868
1869                         /* we are finished copying */
1870                         if(level == 0) {
1871                                 Bprint(ftable, " YYSTYPE;\n");
1872                                 if(fdefine != 0)
1873                                         Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1874                                 return;
1875                         }
1876                 }
1877         }
1878 }
1879
1880 /*
1881  * copies code between \{ and \}
1882  */
1883 void
1884 cpycode(void)
1885 {
1886
1887         long c;
1888
1889         c = Bgetrune(finput);
1890         if(c == '\n') {
1891                 c = Bgetrune(finput);
1892                 lineno++;
1893         }
1894         Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1895         while(c != Beof) {
1896                 if(c == '\\') {
1897                         if((c=Bgetrune(finput)) == '}')
1898                                 return;
1899                         Bputc(ftable, '\\');
1900                 }
1901                 if(c == '%') {
1902                         if((c=Bgetrune(finput)) == '}')
1903                                 return;
1904                         Bputc(ftable, '%');
1905                 }
1906                 Bputrune(ftable, c);
1907                 if(c == '\n')
1908                         lineno++;
1909                 c = Bgetrune(finput);
1910         }
1911         error("eof before %%}");
1912 }
1913
1914 /*
1915  * skip over comments
1916  * skipcom is called after reading a '/'
1917  */
1918 int
1919 skipcom(void)
1920 {
1921         long c;
1922         int i;
1923
1924         /* i is the number of lines skipped */
1925         i = 0;
1926         c = Bgetrune(finput);
1927         if(c == '/'){                   /* C++ //: skip to end of line */
1928                 while((c = Bgetrune(finput)) != Beof)
1929                         if(c == '\n')
1930                                 return 1;
1931         }else if(c == '*'){             /* normal C comment */
1932                 while((c = Bgetrune(finput)) != Beof) {
1933                         while(c == '*')
1934                                 if((c = Bgetrune(finput)) == '/')
1935                                         return i;
1936                         if(c == '\n')
1937                                 i++;
1938                 }
1939         }else
1940                 error("illegal comment");
1941
1942         error("EOF inside comment");
1943         return 0;
1944 }
1945
1946 /*
1947  * copy C action to the next ; or closing }
1948  */
1949 void
1950 cpyact(int offset)
1951 {
1952         long c;
1953         int brac, match, j, s, fnd, tok;
1954
1955         Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1956         brac = 0;
1957
1958 loop:
1959         c = Bgetrune(finput);
1960 swt:
1961         switch(c) {
1962         case ';':
1963                 if(brac == 0) {
1964                         Bputrune(faction, c);
1965                         return;
1966                 }
1967                 goto lcopy;
1968
1969         case '{':
1970                 brac++;
1971                 goto lcopy;
1972
1973         case '$':
1974                 s = 1;
1975                 tok = -1;
1976                 c = Bgetrune(finput);
1977
1978                 /* type description */
1979                 if(c == '<') {
1980                         Bungetrune(finput);
1981                         if(gettok() != TYPENAME)
1982                                 error("bad syntax on $<ident> clause");
1983                         tok = numbval;
1984                         c = Bgetrune(finput);
1985                 }
1986                 if(c == '$') {
1987                         Bprint(faction, "yyval");
1988
1989                         /* put out the proper tag... */
1990                         if(ntypes) {
1991                                 if(tok < 0)
1992                                         tok = fdtype(*prdptr[nprod]);
1993                                 Bprint(faction, ".%s", typeset[tok]);
1994                         }
1995                         goto loop;
1996                 }
1997                 if(c == '-') {
1998                         s = -s;
1999                         c = Bgetrune(finput);
2000                 }
2001                 if(isdigit(c)) {
2002                         j = 0;
2003                         while(isdigit(c)) {
2004                                 j = j*10 + (c-'0');
2005                                 c = Bgetrune(finput);
2006                         }
2007                         Bungetrune(finput);
2008                         j = j*s - offset;
2009                         if(j > 0)
2010                                 error("Illegal use of $%d", j+offset);
2011
2012                 dollar:
2013                         Bprint(faction, "yypt[-%d].yyv", -j);
2014
2015                         /* put out the proper tag */
2016                         if(ntypes) {
2017                                 if(j+offset <= 0 && tok < 0)
2018                                         error("must specify type of $%d", j+offset);
2019                                 if(tok < 0)
2020                                         tok = fdtype(prdptr[nprod][j+offset]);
2021                                 Bprint(faction, ".%s", typeset[tok]);
2022                         }
2023                         goto loop;
2024                 }
2025                 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2026                         int tok; /* tok used oustide for type info */
2027
2028                         /* look for $name */
2029                         Bungetrune(finput);
2030                         if(gettok() != IDENTIFIER)
2031                                 error("$ must be followed by an identifier");
2032                         tok = chfind(2, tokname);
2033                         if((c = Bgetrune(finput)) != '#') {
2034                                 Bungetrune(finput);
2035                                 fnd = -1;
2036                         } else
2037                                 if(gettok() != NUMBER) {
2038                                         error("# must be followed by number");
2039                                         fnd = -1;
2040                                 } else
2041                                         fnd = numbval;
2042                         for(j=1; j<=offset; ++j)
2043                                 if(tok == prdptr[nprod][j]) {
2044                                         if(--fnd <= 0) {
2045                                                 j -= offset;
2046                                                 goto dollar;
2047                                         }
2048                                 }
2049                         error("$name or $name#number not found");
2050                 }
2051                 Bputc(faction, '$');
2052                 if(s < 0 )
2053                         Bputc(faction, '-');
2054                 goto swt;
2055
2056         case '}':
2057                 brac--;
2058                 if(brac)
2059                         goto lcopy;
2060                 Bputrune(faction, c);
2061                 return;
2062
2063         case '/':
2064                 /* look for comments */
2065                 Bputrune(faction, c);
2066                 c = Bgetrune(finput);
2067                 if(c != '*')
2068                         goto swt;
2069
2070                 /* it really is a comment */
2071                 Bputrune(faction, c);
2072                 c = Bgetrune(finput);
2073                 while(c >= 0) {
2074                         while(c == '*') {
2075                                 Bputrune(faction, c);
2076                                 if((c=Bgetrune(finput)) == '/')
2077                                         goto lcopy;
2078                         }
2079                         Bputrune(faction, c);
2080                         if(c == '\n')
2081                                 lineno++;
2082                         c = Bgetrune(finput);
2083                 }
2084                 error("EOF inside comment");
2085
2086         case '\'':
2087                 /* character constant */
2088                 match = '\'';
2089                 goto string;
2090
2091         case '"':
2092                 /* character string */
2093                 match = '"';
2094
2095         string:
2096                 Bputrune(faction, c);
2097                 while(c = Bgetrune(finput)) {
2098                         if(c == '\\') {
2099                                 Bputrune(faction, c);
2100                                 c = Bgetrune(finput);
2101                                 if(c == '\n')
2102                                         lineno++;
2103                         } else
2104                                 if(c == match)
2105                                         goto lcopy;
2106                                 if(c == '\n')
2107                                         error("newline in string or char. const.");
2108                         Bputrune(faction, c);
2109                 }
2110                 error("EOF in string or character constant");
2111
2112         case Beof:
2113                 error("action does not terminate");
2114
2115         case '\n':
2116                 lineno++;
2117                 goto lcopy;
2118         }
2119
2120 lcopy:
2121         Bputrune(faction, c);
2122         goto loop;
2123 }
2124
2125 void
2126 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2127 {
2128         char buf[256];
2129
2130         if(vflag) {
2131                 snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2132                 foutput = Bopen(buf, OWRITE);
2133                 if(foutput == 0)
2134                         error("cannot open %s", buf);
2135                 Blethal(foutput, nil);
2136         }
2137         if(yydebug) {
2138                 snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2139                 if((fdebug = Bopen(buf, OWRITE)) == 0)
2140                         error("can't open %s", buf);
2141         }
2142         if(dflag) {
2143                 snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2144                 fdefine = Bopen(buf, OWRITE);
2145                 if(fdefine == 0)
2146                         error("can't create %s", buf);
2147                 Blethal(fdefine, nil);
2148         }
2149         if(ytab == 0)
2150                 snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2151         else
2152                 strecpy(buf, buf+sizeof buf, ytabc);
2153         ftable = Bopen(buf, OWRITE);
2154         if(ftable == 0)
2155                 error("cannot open table file %s", buf);
2156         Blethal(ftable, nil);
2157 }
2158
2159 /*
2160  * print the output for the states
2161  */
2162 void
2163 output(void)
2164 {
2165         int i, k, c;
2166         Wset *u, *v;
2167
2168         Bprint(ftable, "short   yyexca[] =\n{");
2169         if(fdebug)
2170                 Bprint(fdebug, "char*   yystates[] =\n{\n");
2171
2172         /* output the stuff for state i */
2173         SLOOP(i) {
2174                 nolook = tystate[i]!=MUSTLOOKAHEAD;
2175                 closure(i);
2176
2177                 /* output actions */
2178                 nolook = 1;
2179                 aryfil(temp1, ntokens+nnonter+1, 0);
2180                 WSLOOP(wsets, u) {
2181                         c = *(u->pitem);
2182                         if(c > 1 && c < NTBASE && temp1[c] == 0) {
2183                                 WSLOOP(u, v)
2184                                         if(c == *(v->pitem))
2185                                                 putitem(v->pitem+1, (Lkset*)0);
2186                                 temp1[c] = state(c);
2187                         } else
2188                                 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2189                                         temp1[c+ntokens] = amem[indgo[i]+c];
2190                 }
2191                 if(i == 1)
2192                         temp1[1] = ACCEPTCODE;
2193
2194                 /* now, we have the shifts; look at the reductions */
2195                 lastred = 0;
2196                 WSLOOP(wsets, u) {
2197                         c = *u->pitem;
2198
2199                         /* reduction */
2200                         if(c <= 0) {
2201                                 lastred = -c;
2202                                 TLOOP(k)
2203                                         if(BIT(u->ws.lset, k)) {
2204                                                 if(temp1[k] == 0)
2205                                                         temp1[k] = c;
2206                                                 else
2207                                                 if(temp1[k] < 0) { /* reduce/reduce conflict */
2208                                                         if(foutput)
2209                                                                 Bprint(foutput,
2210                                                                         "\n%d: reduce/reduce conflict"
2211                                                                         " (red'ns %d and %d ) on %s",
2212                                                                         i, -temp1[k], lastred,
2213                                                                         symnam(k));
2214                                                         if(-temp1[k] > lastred)
2215                                                                 temp1[k] = -lastred;
2216                                                         zzrrconf++;
2217                                                 } else
2218                                                         /* potential shift/reduce conflict */
2219                                                         precftn( lastred, k, i );
2220                                         }
2221                                 }
2222                 }
2223                 wract(i);
2224         }
2225
2226         if(fdebug)
2227                 Bprint(fdebug, "};\n");
2228         Bprint(ftable, "};\n");
2229         Bprint(ftable, "#define YYNPROD %d\n", nprod);
2230         Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2231         if(yydebug)
2232                 Bprint(ftable, "#define yydebug %s\n", yydebug);
2233 }
2234
2235 /*
2236  * pack state i from temp1 into amem
2237  */
2238 int
2239 apack(int *p, int n)
2240 {
2241         int *pp, *qq, *rr, off, *q, *r;
2242
2243         /* we don't need to worry about checking because
2244          * we will only look at entries known to be there...
2245          * eliminate leading and trailing 0's
2246          */
2247
2248         q = p+n;
2249         for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2250                 ;
2251         /* no actions */
2252         if(pp > q)
2253                 return 0;
2254         p = pp;
2255
2256         /* now, find a place for the elements from p to q, inclusive */
2257         r = &amem[ACTSIZE-1];
2258         for(rr = amem; rr <= r; rr++, off++) {
2259                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260                         if(*pp != 0)
2261                                 if(*pp != *qq && *qq != 0)
2262                                         goto nextk;
2263
2264                 /* we have found an acceptable k */
2265                 if(pkdebug && foutput != 0)
2266                         Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2267                 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2268                         if(*pp) {
2269                                 if(qq > r)
2270                                         error("action table overflow");
2271                                 if(qq > memp)
2272                                         memp = qq;
2273                                 *qq = *pp;
2274                         }
2275                 if(pkdebug && foutput != 0)
2276                         for(pp = amem; pp <= memp; pp += 10) {
2277                                 Bprint(foutput, "\t");
2278                                 for(qq = pp; qq <= pp+9; qq++)
2279                                         Bprint(foutput, "%d ", *qq);
2280                                 Bprint(foutput, "\n");
2281                         }
2282                 return(off);
2283         nextk:;
2284         }
2285         error("no space in action table");
2286         return 0;
2287 }
2288
2289 /*
2290  * output the gotos for the nontermninals
2291  */
2292 void
2293 go2out(void)
2294 {
2295         int i, j, k, best, count, cbest, times;
2296
2297         /* mark begining of gotos */
2298         Bprint(ftemp, "$\n");
2299         for(i = 1; i <= nnonter; i++) {
2300                 go2gen(i);
2301
2302                 /* find the best one to make default */
2303                 best = -1;
2304                 times = 0;
2305
2306                 /* is j the most frequent */
2307                 for(j = 0; j <= nstate; j++) {
2308                         if(tystate[j] == 0)
2309                                 continue;
2310                         if(tystate[j] == best)
2311                                 continue;
2312
2313                         /* is tystate[j] the most frequent */
2314                         count = 0;
2315                         cbest = tystate[j];
2316                         for(k = j; k <= nstate; k++)
2317                                 if(tystate[k] == cbest)
2318                                         count++;
2319                         if(count > times) {
2320                                 best = cbest;
2321                                 times = count;
2322                         }
2323                 }
2324
2325                 /* best is now the default entry */
2326                 zzgobest += times-1;
2327                 for(j = 0; j <= nstate; j++)
2328                         if(tystate[j] != 0 && tystate[j] != best) {
2329                                 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2330                                 zzgoent++;
2331                         }
2332
2333                 /* now, the default */
2334                 if(best == -1)
2335                         best = 0;
2336                 zzgoent++;
2337                 Bprint(ftemp, "%d\n", best);
2338         }
2339 }
2340
2341 /*
2342  * output the gotos for nonterminal c
2343  */
2344 void
2345 go2gen(int c)
2346 {
2347         int i, work, cc;
2348         Item *p, *q;
2349
2350
2351         /* first, find nonterminals with gotos on c */
2352         aryfil(temp1, nnonter+1, 0);
2353         temp1[c] = 1;
2354         work = 1;
2355         while(work) {
2356                 work = 0;
2357                 PLOOP(0, i)
2358
2359                         /* cc is a nonterminal */
2360                         if((cc=prdptr[i][1]-NTBASE) >= 0)
2361                                 /* cc has a goto on c */
2362                                 if(temp1[cc] != 0) {
2363
2364                                         /* thus, the left side of production i does too */
2365                                         cc = *prdptr[i]-NTBASE;
2366                                         if(temp1[cc] == 0) {
2367                                                   work = 1;
2368                                                   temp1[cc] = 1;
2369                                         }
2370                                 }
2371         }
2372
2373         /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2374         if(g2debug && foutput != 0) {
2375                 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2376                 NTLOOP(i)
2377                         if(temp1[i])
2378                                 Bprint(foutput, "%s ", nontrst[i].name);
2379                 Bprint(foutput, "\n");
2380         }
2381
2382         /* now, go through and put gotos into tystate */
2383         aryfil(tystate, nstate, 0);
2384         SLOOP(i)
2385                 ITMLOOP(i, p, q)
2386                         if((cc = *p->pitem) >= NTBASE)
2387                                 /* goto on c is possible */
2388                                 if(temp1[cc-NTBASE]) {
2389                                         tystate[i] = amem[indgo[i]+c];
2390                                         break;
2391                                 }
2392 }
2393
2394 /*
2395  * decide a shift/reduce conflict by precedence.
2396  * r is a rule number, t a token number
2397  * the conflict is in state s
2398  * temp1[t] is changed to reflect the action
2399  */
2400 void
2401 precftn(int r, int t, int s)
2402 {
2403         int lp, lt, action;
2404
2405         lp = levprd[r];
2406         lt = toklev[t];
2407         if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2408
2409                 /* conflict */
2410                 if(foutput != 0)
2411                         Bprint(foutput,
2412                                 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2413                                 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2414                 zzsrconf++;
2415                 return;
2416         }
2417         if(PLEVEL(lt) == PLEVEL(lp))
2418                 action = ASSOC(lt);
2419         else
2420                 if(PLEVEL(lt) > PLEVEL(lp))
2421                         action = RASC;  /* shift */
2422                 else
2423                         action = LASC;  /* reduce */
2424         switch(action) {
2425         case BASC:  /* error action */
2426                 temp1[t] = ERRCODE;
2427                 break;
2428         case LASC:  /* reduce */
2429                 temp1[t] = -r;
2430                 break;
2431         }
2432 }
2433
2434 /*
2435  * output state i
2436  * temp1 has the actions, lastred the default
2437  */
2438 void
2439 wract(int i)
2440 {
2441         int p, p0, p1, ntimes, tred, count, j, flag;
2442
2443         /* find the best choice for lastred */
2444         lastred = 0;
2445         ntimes = 0;
2446         TLOOP(j) {
2447                 if(temp1[j] >= 0)
2448                         continue;
2449                 if(temp1[j]+lastred == 0)
2450                         continue;
2451                 /* count the number of appearances of temp1[j] */
2452                 count = 0;
2453                 tred = -temp1[j];
2454                 levprd[tred] |= REDFLAG;
2455                 TLOOP(p)
2456                         if(temp1[p]+tred == 0)
2457                                 count++;
2458                 if(count > ntimes) {
2459                         lastred = tred;
2460                         ntimes = count;
2461                 }
2462         }
2463
2464         /*
2465          * for error recovery, arrange that, if there is a shift on the
2466          * error recovery token, `error', that the default be the error action
2467          */
2468         if(temp1[2] > 0)
2469                 lastred = 0;
2470
2471         /* clear out entries in temp1 which equal lastred */
2472         TLOOP(p)
2473                 if(temp1[p]+lastred == 0)
2474                         temp1[p] = 0;
2475
2476         wrstate(i);
2477         defact[i] = lastred;
2478         flag = 0;
2479         TLOOP(p0)
2480                 if((p1=temp1[p0]) != 0) {
2481                         if(p1 < 0) {
2482                                 p1 = -p1;
2483                                 goto exc;
2484                         }
2485                         if(p1 == ACCEPTCODE) {
2486                                 p1 = -1;
2487                                 goto exc;
2488                         }
2489                         if(p1 == ERRCODE) {
2490                                 p1 = 0;
2491                         exc:
2492                                 if(flag++ == 0)
2493                                         Bprint(ftable, "-1, %d,\n", i);
2494                                 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2495                                 zzexcp++;
2496                                 continue;
2497                         }
2498                         Bprint(ftemp, "%d,%d,", p0, p1);
2499                         zzacent++;
2500                 }
2501         if(flag) {
2502                 defact[i] = -2;
2503                 Bprint(ftable, "\t-2, %d,\n", lastred);
2504         }
2505         Bprint(ftemp, "\n");
2506 }
2507
2508 /*
2509  * writes state i
2510  */
2511 void
2512 wrstate(int i)
2513 {
2514         int j0, j1;
2515         Item *pp, *qq;
2516         Wset *u;
2517
2518         if(fdebug) {
2519                 if(lastred) {
2520                         Bprint(fdebug, "        0, /*%d*/\n", i);
2521                 } else {
2522                         Bprint(fdebug, "        \"");
2523                         ITMLOOP(i, pp, qq)
2524                                 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2525                         if(tystate[i] == MUSTLOOKAHEAD)
2526                                 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2527                                         if(*u->pitem < 0)
2528                                                 Bprint(fdebug, "%s\\n", writem(u->pitem));
2529                         Bprint(fdebug, "\", /*%d*/\n", i);
2530                 }
2531         }
2532         if(foutput == 0)
2533                 return;
2534         Bprint(foutput, "\nstate %d\n", i);
2535         ITMLOOP(i, pp, qq)
2536                 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2537         if(tystate[i] == MUSTLOOKAHEAD)
2538                 /* print out empty productions in closure */
2539                 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2540                         if(*u->pitem < 0)
2541                                 Bprint(foutput, "\t%s\n", writem(u->pitem));
2542
2543         /* check for state equal to another */
2544         TLOOP(j0)
2545                 if((j1=temp1[j0]) != 0) {
2546                         Bprint(foutput, "\n\t%s  ", symnam(j0));
2547                         /* shift, error, or accept */
2548                         if(j1 > 0) {
2549                                 if(j1 == ACCEPTCODE)
2550                                         Bprint(foutput,  "accept");
2551                                 else
2552                                         if(j1 == ERRCODE)
2553                                                 Bprint(foutput, "error");
2554                                         else
2555                                                 Bprint(foutput, "shift %d", j1);
2556                         } else
2557                                 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2558                 }
2559
2560         /* output the final production */
2561         if(lastred)
2562                 Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2563                         lastred, rlines[lastred]);
2564         else
2565                 Bprint(foutput, "\n\t.  error\n\n");
2566
2567         /* now, output nonterminal actions */
2568         j1 = ntokens;
2569         for(j0 = 1; j0 <= nnonter; j0++) {
2570                 j1++;
2571                 if(temp1[j1])
2572                         Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2573         }
2574 }
2575
2576 void
2577 warray(char *s, int *v, int n)
2578 {
2579         int i;
2580
2581         Bprint(ftable, "short   %s[] =\n{", s);
2582         for(i=0;;) {
2583                 if(i%10 == 0)
2584                         Bprint(ftable, "\n");
2585                 Bprint(ftable, "%4d", v[i]);
2586                 i++;
2587                 if(i >= n) {
2588                         Bprint(ftable, "\n};\n");
2589                         break;
2590                 }
2591                 Bprint(ftable, ",");
2592         }
2593 }
2594
2595 /*
2596  * in order to free up the mem and amem arrays for the optimizer,
2597  * and still be able to output yyr1, etc., after the sizes of
2598  * the action array is known, we hide the nonterminals
2599  * derived by productions in levprd.
2600  */
2601
2602 void
2603 hideprod(void)
2604 {
2605         int i, j;
2606
2607         j = 0;
2608         levprd[0] = 0;
2609         PLOOP(1, i) {
2610                 if(!(levprd[i] & REDFLAG)) {
2611                         j++;
2612                         if(foutput != 0)
2613                                 Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2614                 }
2615                 levprd[i] = *prdptr[i] - NTBASE;
2616         }
2617         if(j)
2618                 print("%d rules never reduced\n", j);
2619 }
2620
2621 void
2622 callopt(void)
2623 {
2624         int i, *p, j, k, *q;
2625
2626         /* read the arrays from tempfile and set parameters */
2627         finput = Bopen(tempname, OREAD);
2628         if(finput == 0)
2629                 error("optimizer cannot open tempfile");
2630         Blethal(finput, nil);
2631
2632         pgo[0] = 0;
2633         temp1[0] = 0;
2634         nstate = 0;
2635         nnonter = 0;
2636         for(;;) {
2637                 switch(gtnm()) {
2638                 case '\n':
2639                         nstate++;
2640                         pmem--;
2641                         temp1[nstate] = pmem - mem0;
2642                 case ',':
2643                         continue;
2644                 case '$':
2645                         break;
2646                 default:
2647                         error("bad tempfile");
2648                 }
2649                 break;
2650         }
2651
2652         pmem--;
2653         temp1[nstate] = yypgo[0] = pmem - mem0;
2654         for(;;) {
2655                 switch(gtnm()) {
2656                 case '\n':
2657                         nnonter++;
2658                         yypgo[nnonter] = pmem-mem0;
2659                 case ',':
2660                         continue;
2661                 case -1:
2662                         break;
2663                 default:
2664                         error("bad tempfile");
2665                 }
2666                 break;
2667         }
2668         pmem--;
2669         yypgo[nnonter--] = pmem - mem0;
2670         for(i = 0; i < nstate; i++) {
2671                 k = 32000;
2672                 j = 0;
2673                 q = mem0 + temp1[i+1];
2674                 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2675                         if(*p > j)
2676                                 j = *p;
2677                         if(*p < k)
2678                                 k = *p;
2679                 }
2680                 /* nontrivial situation */
2681                 if(k <= j) {
2682                         /* j is now the range */
2683 /*                      j -= k;                 /* call scj */
2684                         if(k > maxoff)
2685                                 maxoff = k;
2686                 }
2687                 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2688                 if(j > maxspr)
2689                         maxspr = j;
2690         }
2691
2692         /* initialize ggreed table */
2693         for(i = 1; i <= nnonter; i++) {
2694                 ggreed[i] = 1;
2695                 j = 0;
2696
2697                 /* minimum entry index is always 0 */
2698                 q = mem0 + yypgo[i+1] - 1;
2699                 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2700                         ggreed[i] += 2;
2701                         if(*p > j)
2702                                 j = *p;
2703                 }
2704                 ggreed[i] = ggreed[i] + 2*j;
2705                 if(j > maxoff)
2706                         maxoff = j;
2707         }
2708
2709         /* now, prepare to put the shift actions into the amem array */
2710         for(i = 0; i < ACTSIZE; i++)
2711                 amem[i] = 0;
2712         maxa = amem;
2713         for(i = 0; i < nstate; i++) {
2714                 if(tystate[i] == 0 && adb > 1)
2715                         Bprint(ftable, "State %d: null\n", i);
2716                 indgo[i] = YYFLAG1;
2717         }
2718         while((i = nxti()) != NOMORE)
2719                 if(i >= 0)
2720                         stin(i);
2721                 else
2722                         gin(-i);
2723
2724         /* print amem array */
2725         if(adb > 2 )
2726                 for(p = amem; p <= maxa; p += 10) {
2727                         Bprint(ftable, "%4d  ", (int)(p-amem));
2728                         for(i = 0; i < 10; ++i)
2729                                 Bprint(ftable, "%4d  ", p[i]);
2730                         Bprint(ftable, "\n");
2731                 }
2732
2733         /* write out the output appropriate to the language */
2734         aoutput();
2735         osummary();
2736         ZAPFILE(tempname);
2737 }
2738
2739 void
2740 gin(int i)
2741 {
2742         int *p, *r, *s, *q1, *q2;
2743
2744         /* enter gotos on nonterminal i into array amem */
2745         ggreed[i] = 0;
2746
2747         q2 = mem0+ yypgo[i+1] - 1;
2748         q1 = mem0 + yypgo[i];
2749
2750         /* now, find amem place for it */
2751         for(p = amem; p < &amem[ACTSIZE]; p++) {
2752                 if(*p)
2753                         continue;
2754                 for(r = q1; r < q2; r += 2) {
2755                         s = p + *r + 1;
2756                         if(*s)
2757                                 goto nextgp;
2758                         if(s > maxa)
2759                                 if((maxa = s) > &amem[ACTSIZE])
2760                                         error("a array overflow");
2761                 }
2762                 /* we have found amem spot */
2763                 *p = *q2;
2764                 if(p > maxa)
2765                         if((maxa = p) > &amem[ACTSIZE])
2766                                 error("a array overflow");
2767                 for(r = q1; r < q2; r += 2) {
2768                         s = p + *r + 1;
2769                         *s = r[1];
2770                 }
2771                 pgo[i] = p-amem;
2772                 if(adb > 1)
2773                         Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2774                 return;
2775
2776         nextgp:;
2777         }
2778         error("cannot place goto %d\n", i);
2779 }
2780
2781 void
2782 stin(int i)
2783 {
2784         int *r, *s, n, flag, j, *q1, *q2;
2785
2786         tystate[i] = 0;
2787
2788         /* enter state i into the amem array */
2789         q2 = mem0+temp1[i+1];
2790         q1 = mem0+temp1[i];
2791         /* find an acceptable place */
2792         for(n = -maxoff; n < ACTSIZE; n++) {
2793                 flag = 0;
2794                 for(r = q1; r < q2; r += 2) {
2795                         if((s = *r + n + amem) < amem)
2796                                 goto nextn;
2797                         if(*s == 0)
2798                                 flag++;
2799                         else
2800                                 if(*s != r[1])
2801                                         goto nextn;
2802                 }
2803
2804                 /* check that the position equals another only if the states are identical */
2805                 for(j=0; j<nstate; j++) {
2806                         if(indgo[j] == n) {
2807
2808                                 /* we have some disagreement */
2809                                 if(flag)
2810                                         goto nextn;
2811                                 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2812
2813                                         /* states are equal */
2814                                         indgo[i] = n;
2815                                         if(adb > 1)
2816                                                 Bprint(ftable,
2817                                                 "State %d: entry at %d equals state %d\n",
2818                                                 i, n, j);
2819                                         return;
2820                                 }
2821
2822                                 /* we have some disagreement */
2823                                 goto nextn;
2824                         }
2825                 }
2826
2827                 for(r = q1; r < q2; r += 2) {
2828                         if((s = *r+n+amem) >= &amem[ACTSIZE])
2829                                 error("out of space in optimizer a array");
2830                         if(s > maxa)
2831                                 maxa = s;
2832                         if(*s != 0 && *s != r[1])
2833                                 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2834                         *s = r[1];
2835                 }
2836                 indgo[i] = n;
2837                 if(adb > 1)
2838                         Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2839                 return;
2840         nextn:;
2841         }
2842         error("Error; failure to place state %d\n", i);
2843 }
2844
2845 /*
2846  * finds the next i
2847  */
2848 int
2849 nxti(void)
2850 {
2851         int i, max, maxi;
2852
2853         max = 0;
2854         maxi = 0;
2855         for(i = 1; i <= nnonter; i++)
2856                 if(ggreed[i] >= max) {
2857                         max = ggreed[i];
2858                         maxi = -i;
2859                 }
2860         for(i = 0; i < nstate; ++i)
2861                 if(tystate[i] >= max) {
2862                         max = tystate[i];
2863                         maxi = i;
2864                 }
2865         if(nxdb)
2866                 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2867         if(max == 0)
2868                 return NOMORE;
2869         return maxi;
2870 }
2871
2872 /*
2873  * write summary
2874  */
2875 void
2876 osummary(void)
2877 {
2878
2879         int i, *p;
2880
2881         if(foutput == 0)
2882                 return;
2883         i = 0;
2884         for(p = maxa; p >= amem; p--)
2885                 if(*p == 0)
2886                         i++;
2887
2888         Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2889                 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2890         Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2891         Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2892 }
2893
2894 /*
2895  * this version is for C
2896  * write out the optimized parser
2897  */
2898 void
2899 aoutput(void)
2900 {
2901         Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2902         arout("yyact", amem, (maxa-amem)+1);
2903         arout("yypact", indgo, nstate);
2904         arout("yypgo", pgo, nnonter+1);
2905 }
2906
2907 void
2908 arout(char *s, int *v, int n)
2909 {
2910         int i;
2911
2912         Bprint(ftable, "short   %s[] =\n{", s);
2913         for(i = 0; i < n;) {
2914                 if(i%10 == 0)
2915                         Bprint(ftable, "\n");
2916                 Bprint(ftable, "%4d", v[i]);
2917                 i++;
2918                 if(i == n)
2919                         Bprint(ftable, "\n};\n");
2920                 else
2921                         Bprint(ftable, ",");
2922         }
2923 }
2924
2925 /*
2926  * read and convert an integer from the standard input
2927  * return the terminating character
2928  * blanks, tabs, and newlines are ignored
2929  */
2930 int
2931 gtnm(void)
2932 {
2933         int sign, val, c;
2934
2935         sign = 0;
2936         val = 0;
2937         while((c=Bgetrune(finput)) != Beof) {
2938                 if(isdigit(c)) {
2939                         val = val*10 + c-'0';
2940                         continue;
2941                 }
2942                 if(c == '-') {
2943                         sign = 1;
2944                         continue;
2945                 }
2946                 break;
2947         }
2948         if(sign)
2949                 val = -val;
2950         *pmem++ = val;
2951         if(pmem >= &mem0[MEMSIZE])
2952                 error("out of space");
2953         return c;
2954 }