22 node(Parselex *plex, int op, Renode *l, Renode *r)
26 plex->instrs += instrcnt[op];
42 return node(plex, TANY, nil, nil);
44 return node(plex, TBOL, nil, nil);
46 return node(plex, TEOL, nil, nil);
48 n = node(plex, TRUNE, nil, nil);
53 return buildclassn(plex);
54 return buildclass(plex);
57 n = node(plex, TSUB, n, nil);
58 if(lex(plex) != LRPAR) {
59 snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
61 longjmp(plex->exitenv, 1);
66 snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
68 snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
70 longjmp(plex->exitenv, 1);
81 if(lex(plex) == LREP) {
84 return node(plex, TSTAR, n, nil);
86 return node(plex, TPLUS, n, nil);
88 return node(plex, TQUES, n, nil);
102 while(n->left->op == TCAT) {
120 if(sym == LEND || sym == LOR || sym == LRPAR)
123 n = node(plex, TCAT, n, e2(plex));
138 n = node(plex, TOR, n, e1(plex));
145 initplex(Parselex *plex, char *regstr, int lit)
147 plex->getnextr = lit ? getnextrlit : getnextr;
148 plex->rawexp = plex->orig = regstr;
157 maxthreads(Renode *tree)
168 regcomp1(char *regstr, int nl, int lit)
173 int regstrlen, maxthr;
175 regstrlen = utflen(regstr);
176 initplex(&plex, regstr, lit);
177 plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
178 if(plex.nodes == nil)
180 plex.next = plex.nodes;
182 if(setjmp(plex.exitenv) != 0) {
187 parsetr = node(&plex, TSUB, e0(&plex), nil);
188 maxthr = maxthreads(parsetr);
192 prtree(parsetr, 0, 1);
193 reprog = malloc(sizeof(Reprog) +
194 sizeof(Reinst) * plex.instrs +
195 sizeof(Rethread) * maxthr +
196 sizeof(Rethread*) * maxthr);
197 reprog->len = plex.instrs;
198 reprog->nthr = maxthr;
199 reprog->startinst = compile(parsetr, reprog, nl);
200 reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
201 reprog->thrpool = (Rethread**)(reprog->threads + reprog->nthr);
202 reprog->regstr = regstr;
211 return regcomp1(str, 0, 0);
215 regcomplit(char *str)
217 return regcomp1(str, 0, 1);
223 return regcomp1(str, 1, 0);
227 compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
238 reinst->r = renode->r;
239 reinst->r1 = renode->r1;
240 reinst->a = reinst + 1 + renode->nclass;
241 renode = renode->left;
245 reinst = compile1(renode->left, reinst, sub, nl);
246 renode = renode->right;
250 reinst->a = reinst + 1;
251 i = compile1(renode->left, reinst->a, sub, nl);
254 i->a = compile1(renode->right, reinst->b, sub, nl);
258 reinst->a = reinst + 1;
259 i = compile1(renode->left, reinst->a, sub, nl);
266 reinst = compile1(renode->left, reinst, sub, nl);
269 reinst->b = reinst + 1;
273 reinst->a = reinst + 1;
274 reinst->b = compile1(renode->left, reinst->a, sub, nl);
278 reinst->sub = s = (*sub)++;
279 reinst = compile1(renode->left, reinst+1, sub, nl);
280 reinst->op = OUNSAVE;
285 reinst++->op = ONOTNL;
290 reinst->r = renode->r;
306 compile(Renode *parsetr, Reprog *reprog, int nl)
312 reinst = (Reinst*)(reprog+1);
313 compile1(parsetr, reinst, &sub, nl);
318 getnextr(Parselex *l)
325 l->rawexp += chartorune(&l->rune, l->rawexp);
326 if(l->rune == L'\\') {
327 l->rawexp += chartorune(&l->rune, l->rawexp);
336 getnextrlit(Parselex *l)
344 l->rawexp += chartorune(&l->rune, l->rawexp);
359 return l->peeklex = LRUNE;
362 return l->peeklex = LEND;
366 return l->peeklex = LREP;
368 return l->peeklex = LOR;
370 return l->peeklex = LANY;
372 return l->peeklex = LLPAR;
374 return l->peeklex = LRPAR;
376 return l->peeklex = LBOL;
378 return l->peeklex = LEOL;
381 return l->peeklex = LCLASS;
383 return l->peeklex = LRUNE;
387 pcmp(void *va, void *vb)
395 n = (vlong)b[0] - (vlong)a[0];
398 return (vlong)b[1] - (vlong)a[1];
402 getclass(Parselex *l)
408 if(l->rune == L'^') {
417 if(l->rune == L'-') {
418 regerror("Malformed class");
419 longjmp(l->exitenv, 1);
421 if(l->rune == '\\') {
426 regerror("No closing ] for class");
427 longjmp(l->exitenv, 1);
430 if(l->rune == L'-') {
432 if(l->rune == L']') {
433 regerror("Malformed class");
434 longjmp(l->exitenv, 1);
436 if(l->rune == L'-') {
437 regerror("Malformed class");
438 longjmp(l->exitenv, 1);
451 if(p >= l->cpairs + nelem(l->cpairs) - 2) {
452 regerror("Class too big\n");
453 longjmp(l->exitenv, 1);
458 qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
460 for(p = l->cpairs+2; *p != 0; p += 2) {
461 if(p[1] < q[0] - 1) {
474 /* classes are in descending order */
476 buildclassn(Parselex *l)
484 n = node(l, TCLASS, nil, nil);
489 for(; *p != 0; p += 2) {
490 n = node(l, TCLASS, n, nil);
496 return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
500 buildclass(Parselex *l)
507 n = node(l, TCLASS, nil, nil);
511 for(p = l->cpairs; *p != 0; p += 2) {
512 n = node(l, TCLASS, n, nil);
521 prtree(Renode *tree, int d, int f)
528 for(i = 0; i < d; i++)
532 prtree(tree->left, d, 0);
533 prtree(tree->right, d, 1);
537 prtree(tree->left, d+1, 1);
538 for(i = 0; i < d; i++)
541 prtree(tree->right, d+1, 1);
545 prtree(tree->left, d+1, 1);
549 prtree(tree->left, d+1, 1);
553 prtree(tree->left, d+1, 1);
557 prtree(tree->left, d+1, 1);
567 prtree(tree->left, d+1, 1);
570 print("TRUNE: %C\n", tree->r);
573 print("TNOTNL: !\\n\n");
576 print("CLASS: %C-%C\n", tree->r, tree->r1);
577 prtree(tree->left, d, 1);