22 node(Parselex *plex, int op, Renode *l, Renode *r)
26 plex->instrs += instrcnt[op];
42 return node(plex, TANY, nil, nil);
44 return node(plex, TBOL, nil, nil);
46 return node(plex, TEOL, nil, nil);
48 n = node(plex, TRUNE, nil, nil);
53 return buildclassn(plex);
54 return buildclass(plex);
57 n = node(plex, TSUB, n, nil);
58 if(lex(plex) != LRPAR) {
59 snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
61 longjmp(plex->exitenv, 1);
66 snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
68 snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
70 longjmp(plex->exitenv, 1);
81 if(lex(plex) == LREP) {
84 return node(plex, TSTAR, n, nil);
86 return node(plex, TPLUS, n, nil);
88 return node(plex, TQUES, n, nil);
102 while(n->left->op == TCAT) {
120 if(sym == LEND || sym == LOR || sym == LRPAR)
123 n = node(plex, TCAT, n, e2(plex));
138 n = node(plex, TOR, n, e1(plex));
145 initplex(Parselex *plex, char *regstr, int lit)
147 plex->getnextr = lit ? getnextrlit : getnextr;
148 plex->rawexp = plex->orig = regstr;
157 maxthreads(Renode *tree)
168 regcomp1(char *regstr, int nl, int lit)
173 int regstrlen, maxthr;
175 regstrlen = utflen(regstr);
176 initplex(&plex, regstr, lit);
177 plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
178 if(plex.nodes == nil)
180 plex.next = plex.nodes;
182 if(setjmp(plex.exitenv) != 0) {
188 parsetr = node(&plex, TSUB, e0(&plex), nil);
190 // prtree(parsetr, 0, 1);
191 // fprint(2, "Program size %ld\n", sizeof(Reprog) + sizeof(Reinst) * plex.instrs + sizeof(Rethread) * maxthr);
192 reprog = malloc(sizeof(Reprog) +
193 sizeof(Reinst) * plex.instrs +
194 sizeof(Rethread) * maxthr);
195 reprog->len = plex.instrs;
196 reprog->nthr = maxthr;
197 reprog->startinst = compile(parsetr, reprog, nl);
198 reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
199 reprog->regstr = regstr;
208 return regcomp1(str, 0, 0);
212 regcomplit(char *str)
214 return regcomp1(str, 0, 1);
220 return regcomp1(str, 1, 0);
224 compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
235 reinst->r = renode->r;
236 reinst->r1 = renode->r1;
237 reinst->a = reinst + 1 + renode->nclass;
238 renode = renode->left;
242 reinst = compile1(renode->left, reinst, sub, nl);
243 renode = renode->right;
247 reinst->a = reinst + 1;
248 i = compile1(renode->left, reinst->a, sub, nl);
251 i->a = compile1(renode->right, reinst->b, sub, nl);
255 reinst->a = reinst + 1;
256 i = compile1(renode->left, reinst->a, sub, nl);
263 reinst = compile1(renode->left, reinst, sub, nl);
266 reinst->b = reinst + 1;
270 reinst->a = reinst + 1;
271 reinst->b = compile1(renode->left, reinst->a, sub, nl);
275 reinst->sub = s = (*sub)++;
276 reinst = compile1(renode->left, reinst+1, sub, nl);
277 reinst->op = OUNSAVE;
282 reinst++->op = ONOTNL;
287 reinst->r = renode->r;
303 compile(Renode *parsetr, Reprog *reprog, int nl)
309 reinst = (Reinst*)(reprog+1);
310 compile1(parsetr, reinst, &sub, nl);
315 getnextr(Parselex *l)
322 l->rawexp += chartorune(&l->rune, l->rawexp);
323 if(l->rune == L'\\') {
324 l->rawexp += chartorune(&l->rune, l->rawexp);
333 getnextrlit(Parselex *l)
341 l->rawexp += chartorune(&l->rune, l->rawexp);
356 return l->peeklex = LRUNE;
359 return l->peeklex = LEND;
363 return l->peeklex = LREP;
365 return l->peeklex = LOR;
367 return l->peeklex = LANY;
369 return l->peeklex = LLPAR;
371 return l->peeklex = LRPAR;
373 return l->peeklex = LBOL;
375 return l->peeklex = LEOL;
378 return l->peeklex = LCLASS;
380 return l->peeklex = LRUNE;
384 pcmp(void *va, void *vb)
392 n = (vlong)b[0] - (vlong)a[0];
395 return (vlong)b[1] - (vlong)a[1];
399 getclass(Parselex *l)
405 if(l->rune == L'^') {
414 if(l->rune == L'-') {
415 regerror("Malformed class");
416 longjmp(l->exitenv, 1);
418 if(l->rune == '\\') {
423 regerror("No closing ] for class");
424 longjmp(l->exitenv, 1);
427 if(l->rune == L'-') {
429 if(l->rune == L']') {
430 regerror("Malformed class");
431 longjmp(l->exitenv, 1);
433 if(l->rune == L'-') {
434 regerror("Malformed class");
435 longjmp(l->exitenv, 1);
448 if(p >= l->cpairs + nelem(l->cpairs) - 2) {
449 regerror("Class too big\n");
450 longjmp(l->exitenv, 1);
455 qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
457 for(p = l->cpairs+2; *p != 0; p += 2) {
458 if(p[1] < q[0] - 1) {
471 /* classes are in descending order */
473 buildclassn(Parselex *l)
481 n = node(l, TCLASS, nil, nil);
486 for(; *p != 0; p += 2) {
487 n = node(l, TCLASS, n, nil);
493 return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
497 buildclass(Parselex *l)
504 n = node(l, TCLASS, nil, nil);
508 for(p = l->cpairs; *p != 0; p += 2) {
509 n = node(l, TCLASS, n, nil);
518 prtree(Renode *tree, int d, int f)
525 for(i = 0; i < d; i++)
529 prtree(tree->left, d, 0);
530 prtree(tree->right, d, 1);
534 prtree(tree->left, d+1, 1);
535 for(i = 0; i < d; i++)
538 prtree(tree->right, d+1, 1);
542 prtree(tree->left, d+1, 1);
546 prtree(tree->left, d+1, 1);
550 prtree(tree->left, d+1, 1);
554 prtree(tree->left, d+1, 1);
564 prtree(tree->left, d+1, 1);
567 print("TRUNE: %C\n", tree->r);
570 print("TNOTNL: !\\n\n");
573 print("CLASS: %C-%C\n", tree->r, tree->r1);
574 prtree(tree->left, d, 1);