]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/lex/parser.y
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / cmd / lex / parser.y
1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2 %left SCON '/' NEWE
3 %left '|'
4 %left '$' '^'
5 %left CHAR CCL NCCL '(' '.' STR NULLS
6 %left ITER
7 %left CAT
8 %left '*' '+' '?'
9
10 %{
11 # include "ldefs.h"
12 #define YYSTYPE union _yystype_
13 union _yystype_
14 {
15         int     i;
16         uchar   *cp;
17 };
18 %}
19 %%
20 %{
21 int i;
22 int j,k;
23 int g;
24 uchar *p;
25 %}
26 acc     :       lexinput
27         ={      
28 # ifdef DEBUG
29                 if(debug) sect2dump();
30 # endif
31         }
32         ;
33 lexinput:       defns delim prods end
34         |       defns delim end
35         ={
36                 if(!funcflag)phead2();
37                 funcflag = TRUE;
38         }
39         | error
40         ={
41 # ifdef DEBUG
42                 if(debug) {
43                         sect1dump();
44                         sect2dump();
45                         }
46 # endif
47                 }
48         ;
49 end:            delim | ;
50 defns:  defns STR STR
51         ={      strcpy((char*)dp,(char*)$2.cp);
52                 def[dptr] = dp;
53                 dp += strlen((char*)$2.cp) + 1;
54                 strcpy((char*)dp,(char*)$3.cp);
55                 subs[dptr++] = dp;
56                 if(dptr >= DEFSIZE)
57                         error("Too many definitions");
58                 dp += strlen((char*)$3.cp) + 1;
59                 if(dp >= dchar+DEFCHAR)
60                         error("Definitions too long");
61                 subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
62         }
63         |
64         ;
65 delim:  DELIM
66         ={
67 # ifdef DEBUG
68                 if(sect == DEFSECTION && debug) sect1dump();
69 # endif
70                 sect++;
71                 }
72         ;
73 prods:  prods pr
74         ={      $$.i = mn2(RNEWE,$1.i,$2.i);
75                 }
76         |       pr
77         ={      $$.i = $1.i;}
78         ;
79 pr:     r NEWE
80         ={
81                 if(divflg == TRUE)
82                         i = mn1(S1FINAL,casecount);
83                 else i = mn1(FINAL,casecount);
84                 $$.i = mn2(RCAT,$1.i,i);
85                 divflg = FALSE;
86                 casecount++;
87                 }
88         | error NEWE
89         ={
90 # ifdef DEBUG
91                 if(debug) sect2dump();
92 # endif
93                 }
94 r:      CHAR
95         ={      $$.i = mn0($1.i); }
96         | STR
97         ={
98                 p = $1.cp;
99                 i = mn0(*p++);
100                 while(*p)
101                         i = mn2(RSTR,i,*p++);
102                 $$.i = i;
103                 }
104         | '.'
105         ={      symbol['\n'] = 0;
106                 if(psave == FALSE){
107                         p = ccptr;
108                         psave = ccptr;
109                         for(i=1;i<'\n';i++){
110                                 symbol[i] = 1;
111                                 *ccptr++ = i;
112                                 }
113                         for(i='\n'+1;i<NCH;i++){
114                                 symbol[i] = 1;
115                                 *ccptr++ = i;
116                                 }
117                         *ccptr++ = 0;
118                         if(ccptr > ccl+CCLSIZE)
119                                 error("Too many large character classes");
120                         }
121                 else
122                         p = psave;
123                 $$.i = mnp(RCCL, p);
124                 cclinter(1);
125                 }
126         | CCL
127         ={      $$.i = mnp(RCCL,$1.cp); }
128         | NCCL
129         ={      $$.i = mnp(RNCCL,$1.cp); }
130         | r '*'
131         ={      $$.i = mn1(STAR,$1.i); }
132         | r '+'
133         ={      $$.i = mn1(PLUS,$1.i); }
134         | r '?'
135         ={      $$.i = mn1(QUEST,$1.i); }
136         | r '|' r
137         ={      $$.i = mn2(BAR,$1.i,$3.i); }
138         | r r %prec CAT
139         ={      $$.i = mn2(RCAT,$1.i,$2.i); }
140         | r '/' r
141         ={      if(!divflg){
142                         j = mn1(S2FINAL,-casecount);
143                         i = mn2(RCAT,$1.i,j);
144                         $$.i = mn2(DIV,i,$3.i);
145                         }
146                 else {
147                         $$.i = mn2(RCAT,$1.i,$3.i);
148                         warning("Extra slash removed");
149                         }
150                 divflg = TRUE;
151                 }
152         | r ITER ',' ITER '}'
153         ={      if($2.i > $4.i){
154                         i = $2.i;
155                         $2.i = $4.i;
156                         $4.i = i;
157                         }
158                 if($4.i <= 0)
159                         warning("Iteration range must be positive");
160                 else {
161                         j = $1.i;
162                         for(k = 2; k<=$2.i;k++)
163                                 j = mn2(RCAT,j,dupl($1.i));
164                         for(i = $2.i+1; i<=$4.i; i++){
165                                 g = dupl($1.i);
166                                 for(k=2;k<=i;k++)
167                                         g = mn2(RCAT,g,dupl($1.i));
168                                 j = mn2(BAR,j,g);
169                                 }
170                         $$.i = j;
171                         }
172         }
173         | r ITER '}'
174         ={
175                 if($2.i < 0)warning("Can't have negative iteration");
176                 else if($2.i == 0) $$.i = mn0(RNULLS);
177                 else {
178                         j = $1.i;
179                         for(k=2;k<=$2.i;k++)
180                                 j = mn2(RCAT,j,dupl($1.i));
181                         $$.i = j;
182                         }
183                 }
184         | r ITER ',' '}'
185         ={
186                                 /* from n to infinity */
187                 if($2.i < 0)warning("Can't have negative iteration");
188                 else if($2.i == 0) $$.i = mn1(STAR,$1.i);
189                 else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
190                 else {          /* >= 2 iterations minimum */
191                         j = $1.i;
192                         for(k=2;k<$2.i;k++)
193                                 j = mn2(RCAT,j,dupl($1.i));
194                         k = mn1(PLUS,dupl($1.i));
195                         $$.i = mn2(RCAT,j,k);
196                         }
197                 }
198         | SCON r
199         ={      $$.i = mn2(RSCON,$2.i,$1.i); }
200         | '^' r
201         ={      $$.i = mn1(CARAT,$2.i); }
202         | r '$'
203         ={      i = mn0('\n');
204                 if(!divflg){
205                         j = mn1(S2FINAL,-casecount);
206                         k = mn2(RCAT,$1.i,j);
207                         $$.i = mn2(DIV,k,i);
208                         }
209                 else $$.i = mn2(RCAT,$1.i,i);
210                 divflg = TRUE;
211                 }
212         | '(' r ')'
213         ={      $$.i = $2.i; }
214         |       NULLS
215         ={      $$.i = mn0(RNULLS); }
216         ;
217 %%
218 int
219 yylex(void)
220 {
221         uchar *p;
222         int c, i;
223         uchar  *t, *xp;
224         int n, j, k, x;
225         static int sectbegin;
226         static uchar token[TOKENSIZE];
227         static int iter;
228
229 # ifdef DEBUG
230         yylval.i = 0;
231         yylval.p = 0;
232 # endif
233
234         if(sect == DEFSECTION) {                /* definitions section */
235                 while(!eof) {
236                         if(prev == '\n'){               /* next char is at beginning of line */
237                                 getl(p=buf);
238                                 switch(*p){
239                                 case '%':
240                                         switch(*(p+1)){
241                                         case '%':
242                                                 lgate();
243                                                 Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
244                                                 Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
245                                                 sectbegin = TRUE;
246                                                 i = treesize*(sizeof(*name)+sizeof(*left)+
247                                                         sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
248                                                 p = myalloc(i,1);
249                                                 if(p == 0)
250                                                         error("Too little core for parse tree");
251                                                 free(p);
252                                                 name = myalloc(treesize,sizeof(*name));
253                                                 left = myalloc(treesize,sizeof(*left));
254                                                 right = myalloc(treesize,sizeof(*right));
255                                                 nullstr = myalloc(treesize,sizeof(*nullstr));
256                                                 parent = myalloc(treesize,sizeof(*parent));
257                                                 ptr = myalloc(treesize,sizeof(*ptr));
258                                                 if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0)
259                                                         error("Too little core for parse tree");
260                                                 return(freturn(DELIM));
261                                         case 'p': case 'P':     /* has overridden number of positions */
262                                                 while(*p && !isdigit(*p))p++;
263                                                 maxpos = atol((char*)p);
264 # ifdef DEBUG
265                                                 if (debug) print("positions (%%p) now %d\n",maxpos);
266 # endif
267                                                 if(report == 2)report = 1;
268                                                 continue;
269                                         case 'n': case 'N':     /* has overridden number of states */
270                                                 while(*p && !isdigit(*p))p++;
271                                                 nstates = atol((char*)p);
272 # ifdef DEBUG
273                                                 if(debug)print( " no. states (%%n) now %d\n",nstates);
274 # endif
275                                                 if(report == 2)report = 1;
276                                                 continue;
277                                         case 'e': case 'E':             /* has overridden number of tree nodes */
278                                                 while(*p && !isdigit(*p))p++;
279                                                 treesize = atol((char*)p);
280 # ifdef DEBUG
281                                                 if (debug) print("treesize (%%e) now %d\n",treesize);
282 # endif
283                                                 if(report == 2)report = 1;
284                                                 continue;
285                                         case 'o': case 'O':
286                                                 while (*p && !isdigit(*p))p++;
287                                                 outsize = atol((char*)p);
288                                                 if (report ==2) report=1;
289                                                 continue;
290                                         case 'a': case 'A':             /* has overridden number of transitions */
291                                                 while(*p && !isdigit(*p))p++;
292                                                 if(report == 2)report = 1;
293                                                 ntrans = atol((char*)p);
294 # ifdef DEBUG
295                                                 if (debug)print("N. trans (%%a) now %d\n",ntrans);
296 # endif
297                                                 continue;
298                                         case 'k': case 'K': /* overriden packed char classes */
299                                                 while (*p && !isdigit(*p))p++;
300                                                 if (report==2) report=1;
301                                                 free(pchar);
302                                                 pchlen = atol((char*)p);
303 # ifdef DEBUG
304                                                 if (debug) print( "Size classes (%%k) now %d\n",pchlen);
305 # endif
306                                                 pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
307                                                 continue;
308                                         case '{':
309                                                 lgate();
310                                                 while(getl(p) && strcmp((char*)p,"%}") != 0)
311                                                         Bprint(&fout, "%s\n",(char*)p);
312                                                 if(p[0] == '%') continue;
313                                                 error("Premature eof");
314                                         case 's': case 'S':             /* start conditions */
315                                                 lgate();
316                                                 while(*p && strchr(" \t,", *p) == 0) p++;
317                                                 n = TRUE;
318                                                 while(n){
319                                                         while(*p && strchr(" \t,", *p)) p++;
320                                                         t = p;
321                                                         while(*p && strchr(" \t,", *p) == 0)p++;
322                                                         if(!*p) n = FALSE;
323                                                         *p++ = 0;
324                                                         if (*t == 0) continue;
325                                                         i = sptr*2;
326                                                         Bprint(&fout,"#define %s %d\n",(char*)t,i);
327                                                         strcpy((char*)sp, (char*)t);
328                                                         sname[sptr++] = sp;
329                                                         sname[sptr] = 0;        /* required by lookup */
330                                                         if(sptr >= STARTSIZE)
331                                                                 error("Too many start conditions");
332                                                         sp += strlen((char*)sp) + 1;
333                                                         if(sp >= stchar+STARTCHAR)
334                                                                 error("Start conditions too long");
335                                                 }
336                                                 continue;
337                                         default:
338                                                 warning("Invalid request %s",p);
339                                                 continue;
340                                         }       /* end of switch after seeing '%' */
341                                 case ' ': case '\t':            /* must be code */
342                                         lgate();
343                                         Bprint(&fout, "%s\n",(char*)p);
344                                         continue;
345                                 default:                /* definition */
346                                         while(*p && !isspace(*p)) p++;
347                                         if(*p == 0)
348                                                 continue;
349                                         prev = *p;
350                                         *p = 0;
351                                         bptr = p+1;
352                                         yylval.cp = buf;
353                                         if(isdigit(buf[0]))
354                                                 warning("Substitution strings may not begin with digits");
355                                         return(freturn(STR));
356                                 }
357                         }
358                         /* still sect 1, but prev != '\n' */
359                         else {
360                                 p = bptr;
361                                 while(*p && isspace(*p)) p++;
362                                 if(*p == 0)
363                                         warning("No translation given - null string assumed");
364                                 strcpy((char*)token, (char*)p);
365                                 yylval.cp = token;
366                                 prev = '\n';
367                                 return(freturn(STR));
368                         }
369                 }
370                 /* end of section one processing */
371         } else if(sect == RULESECTION){         /* rules and actions */
372                 while(!eof){
373                         switch(c=gch()){
374                         case '\0':
375                                 return(freturn(0));
376                         case '\n':
377                                 if(prev == '\n') continue;
378                                 x = NEWE;
379                                 break;
380                         case ' ':
381                         case '\t':
382                                 if(sectbegin == TRUE){
383                                         cpyact();
384                                         while((c=gch()) && c != '\n');
385                                         continue;
386                                 }
387                                 if(!funcflag)phead2();
388                                 funcflag = TRUE;
389                                 Bprint(&fout,"case %d:\n",casecount);
390                                 if(cpyact())
391                                         Bprint(&fout,"break;\n");
392                                 while((c=gch()) && c != '\n');
393                                 if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
394                                         warning("Executable statements should occur right after %%");
395                                         continue;
396                                 }
397                                 x = NEWE;
398                                 break;
399                         case '%':
400                                 if(prev != '\n') goto character;
401                                 if(peek == '{'){        /* included code */
402                                         getl(buf);
403                                         while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
404                                                 Bprint(&fout,"%s\n",(char*)buf);
405                                         continue;
406                                 }
407                                 if(peek == '%'){
408                                         gch();
409                                         gch();
410                                         x = DELIM;
411                                         break;
412                                 }
413                                 goto character;
414                         case '|':
415                                 if(peek == ' ' || peek == '\t' || peek == '\n'){
416                                         Bprint(&fout,"%d\n",30000+casecount++);
417                                         continue;
418                                 }
419                                 x = '|';
420                                 break;
421                         case '$':
422                                 if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
423                                         x = c;
424                                         break;
425                                 }
426                                 goto character;
427                         case '^':
428                                 if(prev != '\n' && scon != TRUE) goto character;        /* valid only at line begin */
429                                 x = c;
430                                 break;
431                         case '?':
432                         case '+':
433                         case '.':
434                         case '*':
435                         case '(':
436                         case ')':
437                         case ',':
438                         case '/':
439                                 x = c;
440                                 break;
441                         case '}':
442                                 iter = FALSE;
443                                 x = c;
444                                 break;
445                         case '{':       /* either iteration or definition */
446                                 if(isdigit(c=gch())){   /* iteration */
447                                         iter = TRUE;
448                                 ieval:
449                                         i = 0;
450                                         while(isdigit(c)){
451                                                 token[i++] = c;
452                                                 c = gch();
453                                         }
454                                         token[i] = 0;
455                                         yylval.i = atol((char*)token);
456                                         munputc(c);
457                                         x = ITER;
458                                         break;
459                                 } else {                /* definition */
460                                         i = 0;
461                                         while(c && c!='}'){
462                                                 token[i++] = c;
463                                                 c = gch();
464                                         }
465                                         token[i] = 0;
466                                         i = lookup(token,def);
467                                         if(i < 0)
468                                                 warning("Definition %s not found",token);
469                                         else
470                                                 munputs(subs[i]);
471                                         continue;
472                                 }
473                         case '<':               /* start condition ? */
474                                 if(prev != '\n')                /* not at line begin, not start */
475                                         goto character;
476                                 t = slptr;
477                                 do {
478                                         i = 0;
479                                         c = gch();
480                                         while(c != ',' && c && c != '>'){
481                                                 token[i++] = c;
482                                                 c = gch();
483                                         }
484                                         token[i] = 0;
485                                         if(i == 0)
486                                                 goto character;
487                                         i = lookup(token,sname);
488                                         if(i < 0) {
489                                                 warning("Undefined start condition %s",token);
490                                                 continue;
491                                         }
492                                         *slptr++ = i+1;
493                                 } while(c && c != '>');
494                                 *slptr++ = 0;
495                                 /* check if previous value re-usable */
496                                 for (xp=slist; xp<t; ){
497                                         if (strcmp((char*)xp, (char*)t)==0)
498                                                 break;
499                                         while (*xp++);
500                                 }
501                                 if (xp<t){
502                                         /* re-use previous pointer to string */
503                                         slptr=t;
504                                         t=xp;
505                                 }
506                                 if(slptr > slist+STARTSIZE)             /* note not packed ! */
507                                         error("Too many start conditions used");
508                                 yylval.cp = t;
509                                 x = SCON;
510                                 break;
511                         case '"':
512                                 i = 0;
513                                 while((c=gch()) && c != '"' && c != '\n'){
514                                         if(c == '\\') c = usescape(gch());
515                                         token[i++] = c;
516                                         if(i > TOKENSIZE){
517                                                 warning("String too long");
518                                                 i = TOKENSIZE-1;
519                                                 break;
520                                         }
521                                 }
522                                 if(c == '\n') {
523                                         yyline--;
524                                         warning("Non-terminated string");
525                                         yyline++;
526                                 }
527                                 token[i] = 0;
528                                 if(i == 0)x = NULLS;
529                                 else if(i == 1){
530                                         yylval.i = token[0];
531                                         x = CHAR;
532                                 } else {
533                                         yylval.cp = token;
534                                         x = STR;
535                                 }
536                                 break;
537                         case '[':
538                                 for(i=1;i<NCH;i++) symbol[i] = 0;
539                                 x = CCL;
540                                 if((c = gch()) == '^'){
541                                         x = NCCL;
542                                         c = gch();
543                                 }
544                                 while(c != ']' && c){
545                                         if(c == '\\') c = usescape(gch());
546                                         symbol[c] = 1;
547                                         j = c;
548                                         if((c=gch()) == '-' && peek != ']'){            /* range specified */
549                                                 c = gch();
550                                                 if(c == '\\') c = usescape(gch());
551                                                 k = c;
552                                                 if(j > k) {
553                                                         n = j;
554                                                         j = k;
555                                                         k = n;
556                                                 }
557                                                 if(!(('A' <= j && k <= 'Z') ||
558                                                      ('a' <= j && k <= 'z') ||
559                                                      ('0' <= j && k <= '9')))
560                                                         warning("Non-portable Character Class");
561                                                 for(n=j+1;n<=k;n++)
562                                                         symbol[n] = 1;          /* implementation dependent */
563                                                 c = gch();
564                                         }
565                                 }
566                                 /* try to pack ccl's */
567                                 i = 0;
568                                 for(j=0;j<NCH;j++)
569                                         if(symbol[j])token[i++] = j;
570                                 token[i] = 0;
571                                 p = ccl;
572                                 while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
573                                 if(p < ccptr)   /* found it */
574                                         yylval.cp = p;
575                                 else {
576                                         yylval.cp = ccptr;
577                                         strcpy((char*)ccptr,(char*)token);
578                                         ccptr += strlen((char*)token) + 1;
579                                         if(ccptr >= ccl+CCLSIZE)
580                                                 error("Too many large character classes");
581                                 }
582                                 cclinter(x==CCL);
583                                 break;
584                         case '\\':
585                                 c = usescape(gch());
586                         default:
587                         character:
588                                 if(iter){       /* second part of an iteration */
589                                         iter = FALSE;
590                                         if('0' <= c && c <= '9')
591                                                 goto ieval;
592                                 }
593                                 if(isalpha(peek)){
594                                         i = 0;
595                                         yylval.cp = token;
596                                         token[i++] = c;
597                                         while(isalpha(peek))
598                                                 token[i++] = gch();
599                                         if(peek == '?' || peek == '*' || peek == '+')
600                                                 munputc(token[--i]);
601                                         token[i] = 0;
602                                         if(i == 1){
603                                                 yylval.i = token[0];
604                                                 x = CHAR;
605                                         }
606                                         else x = STR;
607                                 } else {
608                                         yylval.i = c;
609                                         x = CHAR;
610                                 }
611                         }
612                         scon = FALSE;
613                         if(x == SCON)scon = TRUE;
614                         sectbegin = FALSE;
615                         return(freturn(x));
616                 }
617         }
618         /* section three */
619         ptail();
620 # ifdef DEBUG
621         if(debug)
622                 Bprint(&fout,"\n/*this comes from section three - debug */\n");
623 # endif
624         while(getl(buf) && !eof)
625                 Bprint(&fout,"%s\n",(char*)buf);
626         return(freturn(0));
627 }
628 /* end of yylex */
629 # ifdef DEBUG
630 int
631 freturn(int i)
632 {
633         if(yydebug) {
634                 print("now return ");
635                 if(i < NCH) allprint(i);
636                 else print("%d",i);
637                 print("   yylval = ");
638                 switch(i){
639                         case STR: case CCL: case NCCL:
640                                 strpt(yylval.cp);
641                                 break;
642                         case CHAR:
643                                 allprint(yylval.i);
644                                 break;
645                         default:
646                                 print("%d",yylval.i);
647                                 break;
648                 }
649                 print("\n");
650         }
651         return(i);
652 }
653 # endif