22 node(Parselex *plex, int op, Renode *l, Renode *r)
26 plex->instrs += instrcnt[op];
42 return node(plex, TANY, nil, nil);
44 return node(plex, TBOL, nil, nil);
46 return node(plex, TEOL, nil, nil);
48 n = node(plex, TRUNE, nil, nil);
53 return buildclassn(plex);
54 return buildclass(plex);
57 n = node(plex, TSUB, n, nil);
58 if(lex(plex) != LRPAR) {
59 snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
61 longjmp(plex->exitenv, 1);
66 snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
68 snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
70 longjmp(plex->exitenv, 1);
81 while(lex(plex) == LREP) {
84 n = node(plex, TSTAR, n, nil);
87 n = node(plex, TPLUS, n, nil);
90 n = node(plex, TQUES, n, nil);
105 while(n->left->op == TCAT) {
123 if(sym == LEND || sym == LOR || sym == LRPAR)
126 n = node(plex, TCAT, n, e2(plex));
141 n = node(plex, TOR, n, e1(plex));
148 initplex(Parselex *plex, char *regstr, int lit)
150 plex->getnextr = lit ? getnextrlit : getnextr;
151 plex->rawexp = plex->orig = regstr;
160 regcomp1(char *regstr, int nl, int lit)
165 int regstrlen, maxthr;
167 regstrlen = utflen(regstr);
168 initplex(&plex, regstr, lit);
169 plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
170 if(plex.nodes == nil)
172 plex.next = plex.nodes;
174 if(setjmp(plex.exitenv) != 0) {
179 maxthr = regstrlen + 1;
180 parsetr = node(&plex, TSUB, e0(&plex), nil);
182 // prtree(parsetr, 0, 1);
183 reprog = malloc(sizeof(Reprog) +
184 sizeof(Reinst) * plex.instrs +
185 sizeof(Rethread) * maxthr);
186 reprog->len = plex.instrs;
187 reprog->nthr = maxthr;
188 reprog->startinst = compile(parsetr, reprog, nl);
189 reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
190 reprog->regstr = regstr;
199 return regcomp1(str, 0, 0);
203 regcomplit(char *str)
205 return regcomp1(str, 0, 1);
211 return regcomp1(str, 1, 0);
215 compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
226 reinst->r = renode->r;
227 reinst->r1 = renode->r1;
228 reinst->a = reinst + 1 + renode->nclass;
229 renode = renode->left;
233 reinst = compile1(renode->left, reinst, sub, nl);
234 renode = renode->right;
238 reinst->a = reinst + 1;
239 i = compile1(renode->left, reinst->a, sub, nl);
242 i->a = compile1(renode->right, reinst->b, sub, nl);
246 reinst->a = reinst + 1;
247 i = compile1(renode->left, reinst->a, sub, nl);
254 reinst = compile1(renode->left, reinst, sub, nl);
257 reinst->b = reinst + 1;
261 reinst->a = reinst + 1;
262 reinst->b = compile1(renode->left, reinst->a, sub, nl);
266 reinst->sub = s = (*sub)++;
267 reinst = compile1(renode->left, reinst+1, sub, nl);
268 reinst->op = OUNSAVE;
273 reinst++->op = ONOTNL;
278 reinst->r = renode->r;
294 compile(Renode *parsetr, Reprog *reprog, int nl)
296 Reinst *reinst, *end;
300 reinst = (Reinst*)(reprog+1);
301 end = compile1(parsetr, reinst, &sub, nl);
302 assert(reinst + reprog->len == end);
307 getnextr(Parselex *l)
314 l->rawexp += chartorune(&l->rune, l->rawexp);
315 if(l->rune == L'\\') {
316 l->rawexp += chartorune(&l->rune, l->rawexp);
325 getnextrlit(Parselex *l)
333 l->rawexp += chartorune(&l->rune, l->rawexp);
348 return l->peeklex = LRUNE;
351 return l->peeklex = LEND;
355 return l->peeklex = LREP;
357 return l->peeklex = LOR;
359 return l->peeklex = LANY;
361 return l->peeklex = LLPAR;
363 return l->peeklex = LRPAR;
365 return l->peeklex = LBOL;
367 return l->peeklex = LEOL;
370 return l->peeklex = LCLASS;
372 return l->peeklex = LRUNE;
376 pcmp(void *va, void *vb)
384 n = (vlong)b[0] - (vlong)a[0];
387 return (vlong)b[1] - (vlong)a[1];
391 getclass(Parselex *l)
397 if(l->rune == L'^') {
406 if(l->rune == L'-') {
407 regerror("Malformed class");
408 longjmp(l->exitenv, 1);
410 if(l->rune == '\\') {
415 regerror("No closing ] for class");
416 longjmp(l->exitenv, 1);
419 if(l->rune == L'-') {
421 if(l->rune == L']') {
422 regerror("Malformed class");
423 longjmp(l->exitenv, 1);
425 if(l->rune == L'-') {
426 regerror("Malformed class");
427 longjmp(l->exitenv, 1);
440 if(p >= l->cpairs + nelem(l->cpairs) - 2) {
441 regerror("Class too big\n");
442 longjmp(l->exitenv, 1);
447 qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
449 for(p = l->cpairs+2; *p != 0; p += 2) {
450 if(p[1] < q[0] - 1) {
463 /* classes are in descending order */
465 buildclassn(Parselex *l)
473 n = node(l, TCLASS, nil, nil);
478 for(; *p != 0; p += 2) {
479 n = node(l, TCLASS, n, nil);
485 return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
489 buildclass(Parselex *l)
496 n = node(l, TCLASS, nil, nil);
500 for(p = l->cpairs; *p != 0; p += 2) {
501 n = node(l, TCLASS, n, nil);
510 prtree(Renode *tree, int d, int f)
517 for(i = 0; i < d; i++)
521 prtree(tree->left, d, 0);
522 prtree(tree->right, d, 1);
526 prtree(tree->left, d+1, 1);
527 for(i = 0; i < d; i++)
530 prtree(tree->right, d+1, 1);
534 prtree(tree->left, d+1, 1);
538 prtree(tree->left, d+1, 1);
542 prtree(tree->left, d+1, 1);
546 prtree(tree->left, d+1, 1);
556 prtree(tree->left, d+1, 1);
559 print("TRUNE: %C\n", tree->r);
562 print("TNOTNL: !\\n\n");
565 print("CLASS: %C-%C\n", tree->r, tree->r1);
566 prtree(tree->left, d, 1);