22 static Node andstack[NSTACK];
24 static int atorstack[NSTACK];
26 static int cursubid; /* id of current subexpression */
27 static int subidstack[NSTACK]; /* parallel to atorstack */
29 static int lastwasand; /* Last token was operand */
31 static char* exprp; /* pointer to next character in source expression */
34 static Reclass*classp;
37 static wchar_t yyrune; /* last lex'd rune */
38 static Reclass*yyclassp; /* last lex'd class */
40 /* predeclared crap */
41 static void operator(int);
42 static void pushand(Reinst*, Reinst*);
43 static void pushator(int);
44 static void evaluntil(int);
45 static int bldcclass(void);
47 static jmp_buf regkaboom;
54 longjmp(regkaboom, 1);
72 operator(CAT); /* catenate is implicit */
75 if(t == CCLASS || t == NCCLASS)
87 if(t==RBRA && --nbra<0)
88 rcerror("unmatched right paren");
90 if(++cursubid >= NSUBEXP)
91 rcerror ("too many subexpressions");
100 if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
101 lastwasand = TRUE; /* these look like operands */
105 regerr2(char *s, int c)
120 strcpy(buf, "can't happen: ");
126 pushand(Reinst *f, Reinst *l)
128 if(andp >= &andstack[NSTACK])
129 cant("operand stack overflow");
138 if(atorp >= &atorstack[NSTACK])
139 cant("operator stack overflow");
141 *subidp++ = cursubid;
149 if(andp <= &andstack[0]){
150 regerr2("missing operand for ", op);
160 if(atorp <= &atorstack[0])
161 cant("operator stack underflow");
170 Reinst *inst1, *inst2;
172 while(pri==RBRA || atorp[-1]>=pri){
175 rcerror("unknown operator in evaluntil");
177 case LBRA: /* must have been RBRA */
179 inst2 = newinst(RBRA);
180 inst2->r.subid = *subidp;
181 op1->last->l.next = inst2;
182 inst1 = newinst(LBRA);
183 inst1->r.subid = *subidp;
184 inst1->l.next = op1->first;
185 pushand(inst1, inst2);
190 inst2 = newinst(NOP);
191 op2->last->l.next = inst2;
192 op1->last->l.next = inst2;
194 inst1->r.right = op1->first;
195 inst1->l.left = op2->first;
196 pushand(inst1, inst2);
201 op1->last->l.next = op2->first;
202 pushand(op1->first, op2->last);
207 op2->last->l.next = inst1;
208 inst1->r.right = op2->first;
209 pushand(inst1, inst1);
214 op2->last->l.next = inst1;
215 inst1->r.right = op2->first;
216 pushand(op2->first, inst1);
221 inst2 = newinst(NOP);
222 inst1->l.left = inst2;
223 inst1->r.right = op2->first;
224 op2->last->l.next = inst2;
225 pushand(inst1, inst2);
234 Reinst *inst, *target;
240 * get rid of NOOP chains
242 for(inst=pp->firstinst; inst->type!=END; inst++){
243 target = inst->l.next;
244 while(target->type == NOP)
245 target = target->l.next;
246 inst->l.next = target;
250 * The original allocation is for an area larger than
251 * necessary. Reallocate to the actual space used
252 * and then relocate the code.
254 size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
255 npp = realloc(pp, size);
256 if(npp==0 || npp==pp)
258 diff = (char *)npp - (char *)pp;
259 freep = (Reinst *)((char *)freep + diff);
260 for(inst=npp->firstinst; inst<freep; inst++){
268 *(char **)&inst->r.right += diff;
271 *(char **)&inst->l.left += diff;
273 *(char **)&npp->startinst += diff;
283 print("operators\n");
284 for(ip=atorstack; ip<atorp; ip++)
287 for(stk=andstack; stk<andp; stk++)
288 print("0%o\t0%o\n", stk->first->type, stk->last->type);
299 print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
300 l->l.left-pp->firstinst, l->l.right-pp->firstinst);
302 print("\t%C\n", l->r);
303 else if(l->type == CCLASS || l->type == NCCLASS){
305 if(l->type == NCCLASS)
307 for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
311 print("%C-%C", p[0], p[1]);
323 regerr2("too many character classes; limit", NCLASS+'0');
324 return &(classp[nclass++]);
336 n = mbtowc(rp, exprp, MB_CUR_MAX);
341 n = mbtowc(rp, exprp, MB_CUR_MAX);
353 lex(int literal, int dot_type)
357 quoted = nextc(&yyrune);
358 if(literal || quoted){
396 wchar_t *p, *ep, *np;
400 /* we have already seen the '[' */
402 yyclassp = newclass();
404 /* look ahead for negation */
406 quoted = nextc(&rune);
407 if(!quoted && rune == L'^'){
409 quoted = nextc(&rune);
414 /* parse class into a set of spans */
415 for(; ep<&r[NCCRUNE];){
417 rcerror("malformed '[]'");
420 if(!quoted && rune == L']')
422 if(!quoted && rune == L'-'){
424 rcerror("malformed '[]'");
427 quoted = nextc(&rune);
428 if((!quoted && rune == L']') || rune == 0){
429 rcerror("malformed '[]'");
437 quoted = nextc(&rune);
440 /* sort on span start */
441 for(p = r; p < ep; p += 2){
442 for(np = p; np < ep; np += 2)
454 np = yyclassp->spans;
461 for(; p < ep; p += 2)
470 yyclassp->end = np+2;
477 regcomp1(char *s, int literal, int dot_type)
482 /* get memory for the program */
483 pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
485 regerror("out of memory");
488 freep = pp->firstinst;
492 if(setjmp(regkaboom))
495 /* go compile the sucker */
506 /* Start with a low priority operator to prime parser */
508 while((token = lex(literal, dot_type)) != END){
509 if((token&0300) == OPERATOR)
515 /* Close with a low priority operator */
525 rcerror("unmatched left paren");
526 --andp; /* points to first and only operand */
527 pp->startinst = andp->first;
533 print("start: %d\n", andp->first-pp->firstinst);
547 return regcomp1(s, 0, ANY);
553 return regcomp1(s, 1, ANY);
559 return regcomp1(s, 0, ANYNL);