]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/libregexp/regcomp.c
libregexp: miscellaneous little cleanups
[plan9front.git] / sys / src / libregexp / regcomp.c
index e48ef15acb58b69f089100ce2bad237316e1d544..094f947794c3e6b9b275d6ce64c8fdc7881dbee4 100644 (file)
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define        TRUE    1
-#define        FALSE   0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
-       Reinst* first;
-       Reinst* last;
-}Node;
-
-/* max character classes per program is nelem(reprog->class) */
-static Reprog  *reprog;
-
-/* max rune ranges per character class is nelem(classp->spans)/2 */
-#define NCCRUNE        nelem(classp->spans)
-
-#define        NSTACK  20
-static Node    andstack[NSTACK];
-static Node    *andp;
-static int     atorstack[NSTACK];
-static int*    atorp;
-static int     cursubid;               /* id of current subexpression */
-static int     subidstack[NSTACK];     /* parallel to atorstack */
-static int*    subidp;
-static int     lastwasand;     /* Last token was operand */
-static int     nbra;
-static char*   exprp;          /* pointer to next character in source expression */
-static int     lexdone;
-static int     nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int     errors;
-static Rune    yyrune;         /* last lex'd rune */
-static Reclass*yyclassp;       /* last lex'd class */
-
-/* predeclared crap */
-static void    operator(int);
-static void    pushand(Reinst*, Reinst*);
-static void    pushator(int);
-static void    evaluntil(int);
-static int     bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
+#include <regexp.h>
+#include "regimpl.h"
+
+int instrcnt[] = {
+       [TANY]    2,
+       [TBOL]    1,
+       [TCAT]    0,
+       [TCLASS]  1,
+       [TEOL]    1,
+       [TNOTNL]  1,
+       [TOR]     2,
+       [TPLUS]   1,
+       [TQUES]   1,
+       [TRUNE]   1,
+       [TSTAR]   2,
+       [TSUB]    2
+};
+
+static Renode*
+node(Parselex *plex, int op, Renode *l, Renode *r)
 {
-       errors++;
-       regerror(s);
-       longjmp(regkaboom, 1);
+       Renode *n;
+
+       plex->instrs += instrcnt[op];
+       n = plex->next++;
+       n->op = op;
+       n->left = l;
+       n->right = r;
+       return n;
 }
 
-static Reinst*
-newinst(int t)
+static Renode*
+e3(Parselex *plex)
 {
-       freep->type = t;
-       freep->left = 0;
-       freep->right = 0;
-       return freep++;
+       char error[128];
+       Renode *n;
+
+       switch(lex(plex)) {
+       case LANY:
+               return node(plex, TANY, nil, nil);
+       case LBOL:
+               return node(plex, TBOL, nil, nil);
+       case LEOL:
+               return node(plex, TEOL, nil, nil);
+       case LRUNE:
+               n = node(plex, TRUNE, nil, nil);
+               n->r = plex->rune;
+               return n;
+       case LCLASS:
+               if(plex->nc)
+                       return buildclassn(plex);
+               return buildclass(plex);
+       case LLPAR:
+               n = e0(plex);
+               n = node(plex, TSUB, n, nil);
+               if(lex(plex) != LRPAR) {
+                       snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
+                       regerror(error);
+                       longjmp(plex->exitenv, 1);
+               }
+               return n;
+       default:
+               if(plex->rune)
+                       snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
+               else
+                       snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
+               regerror(error);
+               longjmp(plex->exitenv, 1);
+       }
+       return nil;
 }
 
-static void
-operand(int t)
+static Renode*
+e2(Parselex *plex)
 {
-       Reinst *i;
-
-       if(lastwasand)
-               operator(CAT);  /* catenate is implicit */
-       i = newinst(t);
-
-       if(t == CCLASS || t == NCCLASS)
-               i->cp = yyclassp;
-       if(t == RUNE)
-               i->r = yyrune;
+       Renode *n;
 
-       pushand(i, i);
-       lastwasand = TRUE;
+       n = e3(plex);
+       while(lex(plex) == LREP) {
+               switch(plex->rune) {
+               case L'*':
+                       n = node(plex, TSTAR, n, nil);
+                       break;
+               case L'+':
+                       n = node(plex, TPLUS, n, nil);
+                       break;
+               case L'?':
+                       n = node(plex, TQUES, n, nil);
+                       break;
+               }
+       }
+       plex->peek = 1;
+       return n;
 }
 
-static void
-operator(int t)
+static Renode*
+invert(Renode *n)
 {
-       if(t==RBRA && --nbra<0)
-               rcerror("unmatched right paren");
-       if(t==LBRA){
-               if(++cursubid >= NSUBEXP)
-                       rcerror ("too many subexpressions");
-               nbra++;
-               if(lastwasand)
-                       operator(CAT);
-       } else
-               evaluntil(t);
-       if(t != RBRA)
-               pushator(t);
-       lastwasand = FALSE;
-       if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
-               lastwasand = TRUE;      /* these look like operands */
+       Renode *n1;
+
+       if(n->op != TCAT)
+               return n;
+       while(n->left->op == TCAT) {
+               n1 = n->left;
+               n->left = n1->right;
+               n1->right = n;
+               n = n1;
+       }
+       return n;
 }
 
-static void
-regerr2(char *s, int c)
+static Renode*
+e1(Parselex *plex)
 {
-       char buf[100];
-       char *cp = buf;
-       while(*s)
-               *cp++ = *s++;
-       *cp++ = c;
-       *cp = '\0'; 
-       rcerror(buf);
-}
+       Renode *n;
+       int sym;
 
-static void
-cant(char *s)
-{
-       char buf[100];
-       strcpy(buf, "can't happen: ");
-       strcat(buf, s);
-       rcerror(buf);
+       n = e2(plex);
+       for(;;) {
+               sym = lex(plex);
+               if(sym == LEND || sym == LOR || sym == LRPAR)
+                       break;
+               plex->peek = 1;
+               n = node(plex, TCAT, n, e2(plex));
+       }
+       plex->peek = 1;
+       return invert(n);
 }
 
-static void
-pushand(Reinst *f, Reinst *l)
+static Renode*
+e0(Parselex *plex)
 {
-       if(andp >= &andstack[NSTACK])
-               cant("operand stack overflow");
-       andp->first = f;
-       andp->last = l;
-       andp++;
+       Renode *n;
+
+       n = e1(plex);
+       for(;;) {
+               if(lex(plex) != LOR)
+                       break;
+               n = node(plex, TOR, n, e1(plex));
+       }
+       plex->peek = 1;
+       return n;
 }
 
-static void
-pushator(int t)
+static Parselex*
+initplex(Parselex *plex, char *regstr, int lit)
 {
-       if(atorp >= &atorstack[NSTACK])
-               cant("operator stack overflow");
-       *atorp++ = t;
-       *subidp++ = cursubid;
+       plex->getnextr = lit ? getnextrlit : getnextr;
+       plex->rawexp = plex->orig = regstr;
+       plex->sub = 0;
+       plex->instrs = 0;
+       plex->peek = 0;
+       plex->done = 0;
+       return plex;
 }
 
-static Node*
-popand(int op)
+static Reprog*
+regcomp1(char *regstr, int nl, int lit)
 {
-       Reinst *inst;
-
-       if(andp <= &andstack[0]){
-               regerr2("missing operand for ", op);
-               inst = newinst(NOP);
-               pushand(inst,inst);
+       Reprog *reprog;
+       Parselex plex;
+       Renode *parsetr;
+       int regstrlen, maxthr;
+
+       regstrlen = utflen(regstr);
+       initplex(&plex, regstr, lit);
+       plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
+       if(plex.nodes == nil)
+               return nil;
+       plex.next = plex.nodes;
+
+       if(setjmp(plex.exitenv) != 0) {
+               free(plex.nodes);
+               return nil;
        }
-       return --andp;
-}
 
-static int
-popator(void)
-{
-       if(atorp <= &atorstack[0])
-               cant("operator stack underflow");
-       --subidp;
-       return *--atorp;
+       maxthr = regstrlen + 1;
+       parsetr = node(&plex, TSUB, e0(&plex), nil);
+
+//     prtree(parsetr, 0, 1);
+       reprog = malloc(sizeof(Reprog) +
+                       sizeof(Reinst) * plex.instrs +
+                       sizeof(Rethread) * maxthr);
+       reprog->len = plex.instrs;
+       reprog->nthr = maxthr;
+       reprog->startinst = compile(parsetr, reprog, nl);
+       reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
+       reprog->regstr = regstr;
+
+       free(plex.nodes);
+       return reprog;
 }
 
-static void
-evaluntil(int pri)
+Reprog*
+regcomp(char *str)
 {
-       Node *op1, *op2;
-       Reinst *inst1, *inst2;
-
-       while(pri==RBRA || atorp[-1]>=pri){
-               switch(popator()){
-               default:
-                       rcerror("unknown operator in evaluntil");
-                       break;
-               case LBRA:              /* must have been RBRA */
-                       op1 = popand('(');
-                       inst2 = newinst(RBRA);
-                       inst2->subid = *subidp;
-                       op1->last->next = inst2;
-                       inst1 = newinst(LBRA);
-                       inst1->subid = *subidp;
-                       inst1->next = op1->first;
-                       pushand(inst1, inst2);
-                       return;
-               case OR:
-                       op2 = popand('|');
-                       op1 = popand('|');
-                       inst2 = newinst(NOP);
-                       op2->last->next = inst2;
-                       op1->last->next = inst2;
-                       inst1 = newinst(OR);
-                       inst1->right = op1->first;
-                       inst1->left = op2->first;
-                       pushand(inst1, inst2);
-                       break;
-               case CAT:
-                       op2 = popand(0);
-                       op1 = popand(0);
-                       op1->last->next = op2->first;
-                       pushand(op1->first, op2->last);
-                       break;
-               case STAR:
-                       op2 = popand('*');
-                       inst1 = newinst(OR);
-                       op2->last->next = inst1;
-                       inst1->right = op2->first;
-                       pushand(inst1, inst1);
-                       break;
-               case PLUS:
-                       op2 = popand('+');
-                       inst1 = newinst(OR);
-                       op2->last->next = inst1;
-                       inst1->right = op2->first;
-                       pushand(op2->first, inst1);
-                       break;
-               case QUEST:
-                       op2 = popand('?');
-                       inst1 = newinst(OR);
-                       inst2 = newinst(NOP);
-                       inst1->left = inst2;
-                       inst1->right = op2->first;
-                       op2->last->next = inst2;
-                       pushand(inst1, inst2);
-                       break;
-               }
-       }
+       return regcomp1(str, 0, 0);
 }
 
-static Reprog*
-optimize(Reprog *pp)
+Reprog*
+regcomplit(char *str)
 {
-       Reinst *inst, *target;
-       int size;
-       Reprog *npp;
-       Reclass *cl;
-       int diff;
-
-       /*
-        *  get rid of NOOP chains
-        */
-       for(inst=pp->firstinst; inst->type!=END; inst++){
-               target = inst->next;
-               while(target->type == NOP)
-                       target = target->next;
-               inst->next = target;
-       }
-
-       /*
-        *  The original allocation is for an area larger than
-        *  necessary.  Reallocate to the actual space used
-        *  and then relocate the code.
-        */
-       size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
-       npp = realloc(pp, size);
-       if(npp==0 || npp==pp)
-               return pp;
-       diff = (char *)npp - (char *)pp;
-       freep = (Reinst *)((char *)freep + diff);
-       for(inst=npp->firstinst; inst<freep; inst++){
-               switch(inst->type){
-               case OR:
-               case STAR:
-               case PLUS:
-               case QUEST:
-                       *(char **)&inst->right += diff;
-                       break;
-               case CCLASS:
-               case NCCLASS:
-                       *(char **)&inst->right += diff;
-                       cl = inst->cp;
-                       *(char **)&cl->end += diff;
-                       break;
-               }
-               *(char **)&inst->left += diff;
-       }
-       *(char **)&npp->startinst += diff;
-       return npp;
+       return regcomp1(str, 0, 1);
 }
 
-#ifdef DEBUG
-static void
-dumpstack(void){
-       Node *stk;
-       int *ip;
-
-       print("operators\n");
-       for(ip=atorstack; ip<atorp; ip++)
-               print("0%o\n", *ip);
-       print("operands\n");
-       for(stk=andstack; stk<andp; stk++)
-               print("0%o\t0%o\n", stk->first->type, stk->last->type);
+Reprog*
+regcompnl(char *str)
+{
+       return regcomp1(str, 1, 0);
 }
 
-static void
-dump(Reprog *pp)
+static Reinst*
+compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
 {
-       Reinst *l;
-       Rune *p;
-
-       l = pp->firstinst;
-       do{
-               print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
-                       l->left-pp->firstinst, l->right-pp->firstinst);
-               if(l->type == RUNE)
-                       print("\t%C\n", l->r);
-               else if(l->type == CCLASS || l->type == NCCLASS){
-                       print("\t[");
-                       if(l->type == NCCLASS)
-                               print("^");
-                       for(p = l->cp->spans; p < l->cp->end; p += 2)
-                               if(p[0] == p[1])
-                                       print("%C", p[0]);
-                               else
-                                       print("%C-%C", p[0], p[1]);
-                       print("]\n");
-               } else
-                       print("\n");
-       }while(l++->type);
+       Reinst *i;
+       int s;
+
+Tailcall:
+       if(renode == nil)
+               return reinst;
+       switch(renode->op) {
+       case TCLASS:
+               reinst->op = OCLASS;
+               reinst->r = renode->r;
+               reinst->r1 = renode->r1;
+               reinst->a = reinst + 1 + renode->nclass;
+               renode = renode->left;
+               reinst++;
+               goto Tailcall;
+       case TCAT:
+               reinst = compile1(renode->left, reinst, sub, nl);
+               renode = renode->right;
+               goto Tailcall;
+       case TOR:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               i = compile1(renode->left, reinst->a, sub, nl);
+               reinst->b = i + 1;
+               i->op = OJMP;
+               i->a = compile1(renode->right, reinst->b, sub, nl);
+               return i->a;
+       case TSTAR:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               i = compile1(renode->left, reinst->a, sub, nl);
+               reinst->b = i + 1;
+               i->op = OJMP;
+               i->a = reinst;
+               return reinst->b;
+       case TPLUS:
+               i = reinst;
+               reinst = compile1(renode->left, reinst, sub, nl);
+               reinst->op = OSPLIT;
+               reinst->a = i;
+               reinst->b = reinst + 1;
+               return reinst->b;
+       case TQUES:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               reinst->b = compile1(renode->left, reinst->a, sub, nl);
+               return reinst->b;
+       case TSUB:
+               reinst->op = OSAVE;
+               reinst->sub = s = (*sub)++;
+               reinst = compile1(renode->left, reinst+1, sub, nl);
+               reinst->op = OUNSAVE;
+               reinst->sub = s;
+               return reinst + 1;
+       case TANY:
+               if(nl == 0)
+                       reinst++->op = ONOTNL;
+               reinst->op = OANY;
+               return reinst + 1;
+       case TRUNE:
+               reinst->op = ORUNE;
+               reinst->r = renode->r;
+               return reinst + 1;
+       case TNOTNL:
+               reinst->op = ONOTNL;
+               return reinst + 1;
+       case TEOL:
+               reinst->op = OEOL;
+               return reinst + 1;
+       case TBOL:
+               reinst->op = OBOL;
+               return reinst + 1;
+       }
+       return nil;
 }
-#endif
 
-static Reclass*
-newclass(void)
+static Reinst*
+compile(Renode *parsetr, Reprog *reprog, int nl)
 {
-       if(nclass >= nelem(reprog->class))
-               rcerror("too many character classes; increase Reprog.class size");
-       return &(classp[nclass++]);
+       Reinst *reinst, *end;
+       int sub;
+
+       sub = 0;
+       reinst = (Reinst*)(reprog+1);
+       end = compile1(parsetr, reinst, &sub, nl);
+       assert(end <= reinst + reprog->len);
+       return reinst;
 }
 
-static int
-nextc(Rune *rp)
+static void
+getnextr(Parselex *l)
 {
-       if(lexdone){
-               *rp = 0;
-               return 1;
+       l->literal = 0;
+       if(l->done) {
+               l->rune = L'\0';
+               return;
        }
-       exprp += chartorune(rp, exprp);
-       if(*rp == L'\\'){
-               exprp += chartorune(rp, exprp);
-               return 1;
+       l->rawexp += chartorune(&l->rune, l->rawexp);
+       if(l->rune == L'\\') {
+               l->rawexp += chartorune(&l->rune, l->rawexp);
+               l->literal = 1;
        }
-       if(*rp == 0)
-               lexdone = 1;
-       return 0;
+       if(*l->rawexp == 0)
+               l->done = 1;
+       return;
 }
 
-static int
-lex(int literal, int dot_type)
+static void
+getnextrlit(Parselex *l)
 {
-       int quoted;
-
-       quoted = nextc(&yyrune);
-       if(literal || quoted){
-               if(yyrune == 0)
-                       return END;
-               return RUNE;
+       l->literal = 1;
+       if(l->done) {
+               l->literal = 0;
+               l->rune = L'\0';
+               return;
        }
+       l->rawexp += chartorune(&l->rune, l->rawexp);
+       if(*l->rawexp == 0)
+               l->done = 1;
+       return;
+}
 
-       switch(yyrune){
-       case 0:
-               return END;
+static int
+lex(Parselex *l)
+{
+       if(l->peek) {
+               l->peek = 0;
+               return l->peeklex;
+       }
+       l->getnextr(l);
+       if(l->literal)
+               return l->peeklex = LRUNE;
+       switch(l->rune){
+       case L'\0':
+               return l->peeklex = LEND;
        case L'*':
-               return STAR;
        case L'?':
-               return QUEST;
        case L'+':
-               return PLUS;
+               return l->peeklex = LREP;
        case L'|':
-               return OR;
+               return l->peeklex = LOR;
        case L'.':
-               return dot_type;
+               return l->peeklex = LANY;
        case L'(':
-               return LBRA;
+               return l->peeklex = LLPAR;
        case L')':
-               return RBRA;
+               return l->peeklex = LRPAR;
        case L'^':
-               return BOL;
+               return l->peeklex = LBOL;
        case L'$':
-               return EOL;
+               return l->peeklex = LEOL;
        case L'[':
-               return bldcclass();
+               getclass(l);
+               return l->peeklex = LCLASS;
        }
-       return RUNE;
+       return l->peeklex = LRUNE;
 }
 
 static int
-bldcclass(void)
+pcmp(void *va, void *vb)
 {
-       int type;
-       Rune r[NCCRUNE];
-       Rune *p, *ep, *np;
-       Rune rune;
-       int quoted;
-
-       /* we have already seen the '[' */
-       type = CCLASS;
-       yyclassp = newclass();
-
-       /* look ahead for negation */
-       /* SPECIAL CASE!!! negated classes don't match \n */
-       ep = r;
-       quoted = nextc(&rune);
-       if(!quoted && rune == L'^'){
-               type = NCCLASS;
-               quoted = nextc(&rune);
-               *ep++ = L'\n';
-               *ep++ = L'\n';
-       }
-
-       /* parse class into a set of spans */
-       while(ep < &r[NCCRUNE-1]){
-               if(rune == 0){
-                       rcerror("malformed '[]'");
-                       return 0;
-               }
-               if(!quoted && rune == L']')
-                       break;
-               if(!quoted && rune == L'-'){
-                       if(ep == r){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       quoted = nextc(&rune);
-                       if((!quoted && rune == L']') || rune == 0){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       *(ep-1) = rune;
-               } else {
-                       *ep++ = rune;
-                       *ep++ = rune;
-               }
-               quoted = nextc(&rune);
-       }
-       if(ep >= &r[NCCRUNE-1]) {
-               rcerror("char class too large; increase Reclass.spans size");
-               return 0;
-       }
+       Rune *a, *b;
 
-       /* sort on span start */
-       for(p = r; p < ep; p += 2){
-               for(np = p; np < ep; np += 2)
-                       if(*np < *p){
-                               rune = np[0];
-                               np[0] = p[0];
-                               p[0] = rune;
-                               rune = np[1];
-                               np[1] = p[1];
-                               p[1] = rune;
-                       }
-       }
-
-       /* merge spans */
-       np = yyclassp->spans;
-       p = r;
-       if(r == ep)
-               yyclassp->end = np;
-       else {
-               np[0] = *p++;
-               np[1] = *p++;
-               for(; p < ep; p += 2)
-                       /* overlapping or adjacent ranges? */
-                       if(p[0] <= np[1] + 1){
-                               if(p[1] >= np[1])
-                                       np[1] = p[1];   /* coalesce */
-                       } else {
-                               np += 2;
-                               np[0] = p[0];
-                               np[1] = p[1];
-                       }
-               yyclassp->end = np+2;
-       }
+       a = va;
+       b = vb;
 
-       return type;
+       if(a[0] < b[0])
+               return 1;
+       if(a[0] > b[0])
+               return -1;
+       if(a[1] < b[1])
+               return 1;
+       if(a[1] > b[1])
+               return -1;
+       return 0;
 }
 
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
+static void
+getclass(Parselex *l)
 {
-       int token;
-       Reprog *pp;
-
-       /* get memory for the program */
-       pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
-       if(pp == 0){
-               regerror("out of memory");
-               return 0;
+       Rune *p, *q, t;
+
+       l->nc = 0;
+       getnextrlit(l);
+       if(l->rune == L'^') {
+               l->nc = 1;
+               getnextrlit(l);
        }
-       freep = pp->firstinst;
-       classp = pp->class;
-       errors = 0;
-
-       if(setjmp(regkaboom))
-               goto out;
-
-       /* go compile the sucker */
-       lexdone = 0;
-       exprp = s;
-       nclass = 0;
-       nbra = 0;
-       atorp = atorstack;
-       andp = andstack;
-       subidp = subidstack;
-       lastwasand = FALSE;
-       cursubid = 0;
-
-       /* Start with a low priority operator to prime parser */
-       pushator(START-1);
-       while((token = lex(literal, dot_type)) != END){
-               if((token&0300) == OPERATOR)
-                       operator(token);
-               else
-                       operand(token);
+       p = l->cpairs;
+       for(;;) {
+               *p = l->rune;
+               if(l->rune == L']')
+                       break;
+               if(l->rune == L'-') {
+                       regerror("Malformed class");
+                       longjmp(l->exitenv, 1);
+               }
+               if(l->rune == '\\') {
+                       getnextrlit(l);
+                       *p = l->rune;
+               }
+               if(l->rune == 0) {
+                       regerror("No closing ] for class");
+                       longjmp(l->exitenv, 1);
+               }
+               getnextrlit(l);
+               if(l->rune == L'-') {
+                       getnextrlit(l);
+                       if(l->rune == L']') {
+                               regerror("Malformed class");
+                               longjmp(l->exitenv, 1);
+                       }
+                       if(l->rune == L'-') {
+                               regerror("Malformed class");
+                               longjmp(l->exitenv, 1);
+                       }
+                       if(l->rune == L'\\')
+                               getnextrlit(l);
+                       p[1] = l->rune;
+                       if(p[0] > p[1]) {
+                               t = p[0];
+                               p[0] = p[1];
+                               p[1] = t;
+                       }
+                       getnextrlit(l);
+               } else
+                       p[1] = p[0];
+               if(p >= l->cpairs + nelem(l->cpairs) - 2) {
+                       regerror("Class too big\n");
+                       longjmp(l->exitenv, 1);
+               }
+               p += 2;
        }
-
-       /* Close with a low priority operator */
-       evaluntil(START);
-
-       /* Force END */
-       operand(END);
-       evaluntil(START);
-#ifdef DEBUG
-       dumpstack();
-#endif
-       if(nbra)
-               rcerror("unmatched left paren");
-       --andp; /* points to first and only operand */
-       pp->startinst = andp->first;
-#ifdef DEBUG
-       dump(pp);
-#endif
-       pp = optimize(pp);
-#ifdef DEBUG
-       print("start: %d\n", andp->first-pp->firstinst);
-       dump(pp);
-#endif
-out:
-       if(errors){
-               free(pp);
-               pp = 0;
+       *p = L'\0';
+       qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
+       q = l->cpairs;
+       for(p = l->cpairs+2; *p != 0; p += 2) {
+               if(p[1] < q[0] - 1) {
+                       q[2] = p[0];
+                       q[3] = p[1];
+                       q += 2;
+                       continue;
+               }
+               q[0] = p[0];
+               if(p[1] > q[1])
+                       q[1] = p[1];
        }
-       return pp;
+       q[2] = 0;
 }
 
-extern Reprog*
-regcomp(char *s)
+/* classes are in descending order see pcmp */
+static Renode*
+buildclassn(Parselex *l)
 {
-       return regcomp1(s, 0, ANY);
+       Renode *n;
+       Rune *p;
+       int i;
+
+       i = 0;
+       p = l->cpairs;
+       n = node(l, TCLASS, nil, nil);
+       n->r = p[1] + 1;
+       n->r1 = Runemax;
+       n->nclass = i++;
+
+       for(; *p != 0; p += 2) {
+               n = node(l, TCLASS, n, nil);
+               n->r = p[3] + 1;
+               n->r1 = p[0] - 1;
+               n->nclass = i++;
+       }
+       n->r = 0;
+       return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
 }
 
-extern Reprog*
-regcomplit(char *s)
+static Renode*
+buildclass(Parselex *l)
 {
-       return regcomp1(s, 1, ANY);
+       Renode *n;
+       Rune *p;
+       int i;
+
+       i = 0;
+       n = node(l, TCLASS, nil, nil);
+       n->r = Runemax + 1;
+       n->nclass = i++;
+
+       for(p = l->cpairs; *p != 0; p += 2) {
+               n = node(l, TCLASS, n, nil);
+               n->r = p[0];
+               n->r1 = p[1];
+               n->nclass = i++;
+       }
+       return n;
 }
 
-extern Reprog*
-regcompnl(char *s)
+static void
+prtree(Renode *tree, int d, int f)
 {
-       return regcomp1(s, 0, ANYNL);
+       int i;
+
+       if(tree == nil)
+               return;
+       if(f)
+       for(i = 0; i < d; i++)
+               print("\t");
+       switch(tree->op) {
+       case TCAT:
+               prtree(tree->left, d, 0);
+               prtree(tree->right, d, 1);
+               break;
+       case TOR:
+               print("TOR\n");
+               prtree(tree->left, d+1, 1);
+               for(i = 0; i < d; i++)
+                       print("\t");
+               print("|\n");
+               prtree(tree->right, d+1, 1);
+               break;
+       case TSTAR:
+               print("*\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TPLUS:
+               print("+\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TQUES:
+               print("?\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TANY:
+               print(".\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TBOL:
+               print("^\n");
+               break;
+       case TEOL:
+               print("$\n");
+               break;
+       case TSUB:
+               print("TSUB\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TRUNE:
+               print("TRUNE: %C\n", tree->r);
+               break;
+       case TNOTNL:
+               print("TNOTNL: !\\n\n");
+               break;
+       case TCLASS:
+               print("CLASS: %C-%C\n", tree->r, tree->r1);
+               prtree(tree->left, d, 1);
+               break;
+       }
 }