22 node(Parselex *plex, int op, Renode *l, Renode *r)
26 plex->instrs += instrcnt[op];
42 return node(plex, TANY, nil, nil);
44 return node(plex, TBOL, nil, nil);
46 return node(plex, TEOL, nil, nil);
48 n = node(plex, TRUNE, nil, nil);
53 return buildclassn(plex);
54 return buildclass(plex);
57 n = node(plex, TSUB, n, nil);
58 if(lex(plex) != LRPAR) {
59 snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
61 longjmp(plex->exitenv, 1);
66 snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
68 snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
70 longjmp(plex->exitenv, 1);
81 if(lex(plex) == LREP) {
84 return node(plex, TSTAR, n, nil);
86 return node(plex, TPLUS, n, nil);
88 return node(plex, TQUES, n, nil);
102 while(n->left->op == TCAT) {
120 if(sym == LEND || sym == LOR || sym == LRPAR)
123 n = node(plex, TCAT, n, e2(plex));
138 n = node(plex, TOR, n, e1(plex));
145 initplex(Parselex *plex, char *regstr, int lit)
147 plex->getnextr = lit ? getnextrlit : getnextr;
148 plex->rawexp = plex->orig = regstr;
157 maxthreads(Renode *tree)
168 regcomp1(char *regstr, int nl, int lit)
173 int regstrlen, maxthr;
175 regstrlen = utflen(regstr);
176 initplex(&plex, regstr, lit);
177 plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
178 if(plex.nodes == nil)
180 plex.next = plex.nodes;
182 if(setjmp(plex.exitenv) != 0) {
188 parsetr = node(&plex, TSUB, e0(&plex), nil);
190 // prtree(parsetr, 0, 1);
191 reprog = malloc(sizeof(Reprog) +
192 sizeof(Reinst) * plex.instrs +
193 sizeof(Rethread) * maxthr);
194 reprog->len = plex.instrs;
195 reprog->nthr = maxthr;
196 reprog->startinst = compile(parsetr, reprog, nl);
197 reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
198 reprog->regstr = regstr;
207 return regcomp1(str, 0, 0);
211 regcomplit(char *str)
213 return regcomp1(str, 0, 1);
219 return regcomp1(str, 1, 0);
223 compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
234 reinst->r = renode->r;
235 reinst->r1 = renode->r1;
236 reinst->a = reinst + 1 + renode->nclass;
237 renode = renode->left;
241 reinst = compile1(renode->left, reinst, sub, nl);
242 renode = renode->right;
246 reinst->a = reinst + 1;
247 i = compile1(renode->left, reinst->a, sub, nl);
250 i->a = compile1(renode->right, reinst->b, sub, nl);
254 reinst->a = reinst + 1;
255 i = compile1(renode->left, reinst->a, sub, nl);
262 reinst = compile1(renode->left, reinst, sub, nl);
265 reinst->b = reinst + 1;
269 reinst->a = reinst + 1;
270 reinst->b = compile1(renode->left, reinst->a, sub, nl);
274 reinst->sub = s = (*sub)++;
275 reinst = compile1(renode->left, reinst+1, sub, nl);
276 reinst->op = OUNSAVE;
281 reinst++->op = ONOTNL;
286 reinst->r = renode->r;
302 compile(Renode *parsetr, Reprog *reprog, int nl)
308 reinst = (Reinst*)(reprog+1);
309 compile1(parsetr, reinst, &sub, nl);
314 getnextr(Parselex *l)
321 l->rawexp += chartorune(&l->rune, l->rawexp);
322 if(l->rune == L'\\') {
323 l->rawexp += chartorune(&l->rune, l->rawexp);
332 getnextrlit(Parselex *l)
340 l->rawexp += chartorune(&l->rune, l->rawexp);
355 return l->peeklex = LRUNE;
358 return l->peeklex = LEND;
362 return l->peeklex = LREP;
364 return l->peeklex = LOR;
366 return l->peeklex = LANY;
368 return l->peeklex = LLPAR;
370 return l->peeklex = LRPAR;
372 return l->peeklex = LBOL;
374 return l->peeklex = LEOL;
377 return l->peeklex = LCLASS;
379 return l->peeklex = LRUNE;
383 pcmp(void *va, void *vb)
391 n = (vlong)b[0] - (vlong)a[0];
394 return (vlong)b[1] - (vlong)a[1];
398 getclass(Parselex *l)
404 if(l->rune == L'^') {
413 if(l->rune == L'-') {
414 regerror("Malformed class");
415 longjmp(l->exitenv, 1);
417 if(l->rune == '\\') {
422 regerror("No closing ] for class");
423 longjmp(l->exitenv, 1);
426 if(l->rune == L'-') {
428 if(l->rune == L']') {
429 regerror("Malformed class");
430 longjmp(l->exitenv, 1);
432 if(l->rune == L'-') {
433 regerror("Malformed class");
434 longjmp(l->exitenv, 1);
447 if(p >= l->cpairs + nelem(l->cpairs) - 2) {
448 regerror("Class too big\n");
449 longjmp(l->exitenv, 1);
454 qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
456 for(p = l->cpairs+2; *p != 0; p += 2) {
457 if(p[1] < q[0] - 1) {
470 /* classes are in descending order */
472 buildclassn(Parselex *l)
480 n = node(l, TCLASS, nil, nil);
485 for(; *p != 0; p += 2) {
486 n = node(l, TCLASS, n, nil);
492 return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
496 buildclass(Parselex *l)
503 n = node(l, TCLASS, nil, nil);
507 for(p = l->cpairs; *p != 0; p += 2) {
508 n = node(l, TCLASS, n, nil);
517 prtree(Renode *tree, int d, int f)
524 for(i = 0; i < d; i++)
528 prtree(tree->left, d, 0);
529 prtree(tree->right, d, 1);
533 prtree(tree->left, d+1, 1);
534 for(i = 0; i < d; i++)
537 prtree(tree->right, d+1, 1);
541 prtree(tree->left, d+1, 1);
545 prtree(tree->left, d+1, 1);
549 prtree(tree->left, d+1, 1);
553 prtree(tree->left, d+1, 1);
563 prtree(tree->left, d+1, 1);
566 print("TRUNE: %C\n", tree->r);
569 print("TNOTNL: !\\n\n");
572 print("CLASS: %C-%C\n", tree->r, tree->r1);
573 prtree(tree->left, d, 1);