22 node(Parselex *plex, int op, Renode *l, Renode *r)
26 plex->instrs += instrcnt[op];
42 return node(plex, TANY, nil, nil);
44 return node(plex, TBOL, nil, nil);
46 return node(plex, TEOL, nil, nil);
48 n = node(plex, TRUNE, nil, nil);
53 return buildclassn(plex);
54 return buildclass(plex);
57 n = node(plex, TSUB, n, nil);
58 if(lex(plex) != LRPAR) {
59 snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
61 longjmp(plex->exitenv, 1);
66 snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
68 snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
70 longjmp(plex->exitenv, 1);
81 while(lex(plex) == LREP) {
84 n = node(plex, TSTAR, n, nil);
87 n = node(plex, TPLUS, n, nil);
90 n = node(plex, TQUES, n, nil);
105 while(n->left->op == TCAT) {
123 if(sym == LEND || sym == LOR || sym == LRPAR)
126 n = node(plex, TCAT, n, e2(plex));
141 n = node(plex, TOR, n, e1(plex));
148 initplex(Parselex *plex, char *regstr, int lit)
150 plex->getnextr = lit ? getnextrlit : getnextr;
151 plex->rawexp = plex->orig = regstr;
160 maxthreads(Renode *tree)
171 regcomp1(char *regstr, int nl, int lit)
176 int regstrlen, maxthr;
178 regstrlen = utflen(regstr);
179 initplex(&plex, regstr, lit);
180 plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
181 if(plex.nodes == nil)
183 plex.next = plex.nodes;
185 if(setjmp(plex.exitenv) != 0) {
191 parsetr = node(&plex, TSUB, e0(&plex), nil);
193 // prtree(parsetr, 0, 1);
194 reprog = malloc(sizeof(Reprog) +
195 sizeof(Reinst) * plex.instrs +
196 sizeof(Rethread) * maxthr);
197 reprog->len = plex.instrs;
198 reprog->nthr = maxthr;
199 reprog->startinst = compile(parsetr, reprog, nl);
200 reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
201 reprog->regstr = regstr;
210 return regcomp1(str, 0, 0);
214 regcomplit(char *str)
216 return regcomp1(str, 0, 1);
222 return regcomp1(str, 1, 0);
226 compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
237 reinst->r = renode->r;
238 reinst->r1 = renode->r1;
239 reinst->a = reinst + 1 + renode->nclass;
240 renode = renode->left;
244 reinst = compile1(renode->left, reinst, sub, nl);
245 renode = renode->right;
249 reinst->a = reinst + 1;
250 i = compile1(renode->left, reinst->a, sub, nl);
253 i->a = compile1(renode->right, reinst->b, sub, nl);
257 reinst->a = reinst + 1;
258 i = compile1(renode->left, reinst->a, sub, nl);
265 reinst = compile1(renode->left, reinst, sub, nl);
268 reinst->b = reinst + 1;
272 reinst->a = reinst + 1;
273 reinst->b = compile1(renode->left, reinst->a, sub, nl);
277 reinst->sub = s = (*sub)++;
278 reinst = compile1(renode->left, reinst+1, sub, nl);
279 reinst->op = OUNSAVE;
284 reinst++->op = ONOTNL;
289 reinst->r = renode->r;
305 compile(Renode *parsetr, Reprog *reprog, int nl)
311 reinst = (Reinst*)(reprog+1);
312 compile1(parsetr, reinst, &sub, nl);
317 getnextr(Parselex *l)
324 l->rawexp += chartorune(&l->rune, l->rawexp);
325 if(l->rune == L'\\') {
326 l->rawexp += chartorune(&l->rune, l->rawexp);
335 getnextrlit(Parselex *l)
343 l->rawexp += chartorune(&l->rune, l->rawexp);
358 return l->peeklex = LRUNE;
361 return l->peeklex = LEND;
365 return l->peeklex = LREP;
367 return l->peeklex = LOR;
369 return l->peeklex = LANY;
371 return l->peeklex = LLPAR;
373 return l->peeklex = LRPAR;
375 return l->peeklex = LBOL;
377 return l->peeklex = LEOL;
380 return l->peeklex = LCLASS;
382 return l->peeklex = LRUNE;
386 pcmp(void *va, void *vb)
394 n = (vlong)b[0] - (vlong)a[0];
397 return (vlong)b[1] - (vlong)a[1];
401 getclass(Parselex *l)
407 if(l->rune == L'^') {
416 if(l->rune == L'-') {
417 regerror("Malformed class");
418 longjmp(l->exitenv, 1);
420 if(l->rune == '\\') {
425 regerror("No closing ] for class");
426 longjmp(l->exitenv, 1);
429 if(l->rune == L'-') {
431 if(l->rune == L']') {
432 regerror("Malformed class");
433 longjmp(l->exitenv, 1);
435 if(l->rune == L'-') {
436 regerror("Malformed class");
437 longjmp(l->exitenv, 1);
450 if(p >= l->cpairs + nelem(l->cpairs) - 2) {
451 regerror("Class too big\n");
452 longjmp(l->exitenv, 1);
457 qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
459 for(p = l->cpairs+2; *p != 0; p += 2) {
460 if(p[1] < q[0] - 1) {
473 /* classes are in descending order */
475 buildclassn(Parselex *l)
483 n = node(l, TCLASS, nil, nil);
488 for(; *p != 0; p += 2) {
489 n = node(l, TCLASS, n, nil);
495 return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
499 buildclass(Parselex *l)
506 n = node(l, TCLASS, nil, nil);
510 for(p = l->cpairs; *p != 0; p += 2) {
511 n = node(l, TCLASS, n, nil);
520 prtree(Renode *tree, int d, int f)
527 for(i = 0; i < d; i++)
531 prtree(tree->left, d, 0);
532 prtree(tree->right, d, 1);
536 prtree(tree->left, d+1, 1);
537 for(i = 0; i < d; i++)
540 prtree(tree->right, d+1, 1);
544 prtree(tree->left, d+1, 1);
548 prtree(tree->left, d+1, 1);
552 prtree(tree->left, d+1, 1);
556 prtree(tree->left, d+1, 1);
566 prtree(tree->left, d+1, 1);
569 print("TRUNE: %C\n", tree->r);
572 print("TNOTNL: !\\n\n");
575 print("CLASS: %C-%C\n", tree->r, tree->r1);
576 prtree(tree->left, d, 1);