19 /* max character classes per program is nelem(reprog->class) */
20 static Reprog *reprog;
22 /* max rune ranges per character class is nelem(classp->spans)/2 */
23 #define NCCRUNE nelem(classp->spans)
26 static Node andstack[NSTACK];
28 static int atorstack[NSTACK];
30 static int cursubid; /* id of current subexpression */
31 static int subidstack[NSTACK]; /* parallel to atorstack */
33 static int lastwasand; /* Last token was operand */
35 static char* exprp; /* pointer to next character in source expression */
38 static Reclass*classp;
41 static Rune yyrune; /* last lex'd rune */
42 static Reclass*yyclassp; /* last lex'd class */
44 /* predeclared crap */
45 static void operator(int);
46 static void pushand(Reinst*, Reinst*);
47 static void pushator(int);
48 static void evaluntil(int);
49 static int bldcclass(void);
51 static jmp_buf regkaboom;
58 longjmp(regkaboom, 1);
76 operator(CAT); /* catenate is implicit */
79 if(t == CCLASS || t == NCCLASS)
91 if(t==RBRA && --nbra<0)
92 rcerror("unmatched right paren");
94 if(++cursubid >= NSUBEXP)
95 rcerror ("too many subexpressions");
104 if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
105 lastwasand = TRUE; /* these look like operands */
109 regerr2(char *s, int c)
124 strcpy(buf, "can't happen: ");
130 pushand(Reinst *f, Reinst *l)
132 if(andp >= &andstack[NSTACK])
133 cant("operand stack overflow");
142 if(atorp >= &atorstack[NSTACK])
143 cant("operator stack overflow");
145 *subidp++ = cursubid;
153 if(andp <= &andstack[0]){
154 regerr2("missing operand for ", op);
164 if(atorp <= &atorstack[0])
165 cant("operator stack underflow");
174 Reinst *inst1, *inst2;
176 while(pri==RBRA || atorp[-1]>=pri){
179 rcerror("unknown operator in evaluntil");
181 case LBRA: /* must have been RBRA */
183 inst2 = newinst(RBRA);
184 inst2->subid = *subidp;
185 op1->last->next = inst2;
186 inst1 = newinst(LBRA);
187 inst1->subid = *subidp;
188 inst1->next = op1->first;
189 pushand(inst1, inst2);
194 inst2 = newinst(NOP);
195 op2->last->next = inst2;
196 op1->last->next = inst2;
198 inst1->right = op1->first;
199 inst1->left = op2->first;
200 pushand(inst1, inst2);
205 op1->last->next = op2->first;
206 pushand(op1->first, op2->last);
211 op2->last->next = inst1;
212 inst1->right = op2->first;
213 pushand(inst1, inst1);
218 op2->last->next = inst1;
219 inst1->right = op2->first;
220 pushand(op2->first, inst1);
225 inst2 = newinst(NOP);
227 inst1->right = op2->first;
228 op2->last->next = inst2;
229 pushand(inst1, inst2);
238 Reinst *inst, *target;
245 * get rid of NOOP chains
247 for(inst=pp->firstinst; inst->type!=END; inst++){
249 while(target->type == NOP)
250 target = target->next;
255 * The original allocation is for an area larger than
256 * necessary. Reallocate to the actual space used
257 * and then relocate the code.
259 size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
260 npp = realloc(pp, size);
261 if(npp==0 || npp==pp)
263 diff = (char *)npp - (char *)pp;
264 freep = (Reinst *)((char *)freep + diff);
265 for(inst=npp->firstinst; inst<freep; inst++){
271 *(char **)&inst->right += diff;
275 *(char **)&inst->right += diff;
277 *(char **)&cl->end += diff;
280 *(char **)&inst->left += diff;
282 *(char **)&npp->startinst += diff;
292 print("operators\n");
293 for(ip=atorstack; ip<atorp; ip++)
296 for(stk=andstack; stk<andp; stk++)
297 print("0%o\t0%o\n", stk->first->type, stk->last->type);
308 print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
309 l->left-pp->firstinst, l->right-pp->firstinst);
311 print("\t%C\n", l->r);
312 else if(l->type == CCLASS || l->type == NCCLASS){
314 if(l->type == NCCLASS)
316 for(p = l->cp->spans; p < l->cp->end; p += 2)
320 print("%C-%C", p[0], p[1]);
331 if(nclass >= nelem(reprog->class))
332 rcerror("too many character classes; increase Reprog.class size");
333 return &(classp[nclass++]);
343 exprp += chartorune(rp, exprp);
345 exprp += chartorune(rp, exprp);
354 lex(int literal, int dot_type)
358 quoted = nextc(&yyrune);
359 if(literal || quoted){
401 /* we have already seen the '[' */
403 yyclassp = newclass();
405 /* look ahead for negation */
406 /* SPECIAL CASE!!! negated classes don't match \n */
408 quoted = nextc(&rune);
409 if(!quoted && rune == L'^'){
411 quoted = nextc(&rune);
416 /* parse class into a set of spans */
417 while(ep < &r[NCCRUNE-1]){
419 rcerror("malformed '[]'");
422 if(!quoted && rune == L']')
424 if(!quoted && rune == L'-'){
426 rcerror("malformed '[]'");
429 quoted = nextc(&rune);
430 if((!quoted && rune == L']') || rune == 0){
431 rcerror("malformed '[]'");
439 quoted = nextc(&rune);
441 if(ep >= &r[NCCRUNE-1]) {
442 rcerror("char class too large; increase Reclass.spans size");
446 /* sort on span start */
447 for(p = r; p < ep; p += 2){
448 for(np = p; np < ep; np += 2)
460 np = yyclassp->spans;
467 for(; p < ep; p += 2)
468 /* overlapping or adjacent ranges? */
469 if(p[0] <= np[1] + 1){
471 np[1] = p[1]; /* coalesce */
477 yyclassp->end = np+2;
484 regcomp1(char *s, int literal, int dot_type)
489 /* get memory for the program */
490 pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
492 regerror("out of memory");
495 freep = pp->firstinst;
499 if(setjmp(regkaboom))
502 /* go compile the sucker */
513 /* Start with a low priority operator to prime parser */
515 while((token = lex(literal, dot_type)) != END){
516 if((token&0300) == OPERATOR)
522 /* Close with a low priority operator */
532 rcerror("unmatched left paren");
533 --andp; /* points to first and only operand */
534 pp->startinst = andp->first;
540 print("start: %d\n", andp->first-pp->firstinst);
554 return regcomp1(s, 0, ANY);
560 return regcomp1(s, 1, ANY);
566 return regcomp1(s, 0, ANYNL);