From: cinap_lenrek Date: Thu, 28 Apr 2016 18:53:53 +0000 (+0200) Subject: merge X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=4c21da2ededfb5fc628dbee0d75b0aa5263ef97b;hp=ca5b491753ad363da8d9deb5978b56231ec27bf5;p=plan9front.git merge --- diff --git a/sys/doc/prfile b/sys/doc/prfile old mode 100644 new mode 100755 diff --git a/sys/include/regexp.h b/sys/include/regexp.h index a574674ac..770942801 100644 --- a/sys/include/regexp.h +++ b/sys/include/regexp.h @@ -1,15 +1,29 @@ -#pragma src "/sys/src/libregexp" -#pragma lib "libregexp.a" +#pragma src "/sys/src/libregexp" +#pragma lib "libregexp.a" +enum +{ + OANY = 0, + OBOL, + OCLASS, + OEOL, + OJMP, + ONOTNL, + ORUNE, + OSAVE, + OSPLIT, + OUNSAVE, +}; + +typedef struct Resub Resub; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; +typedef struct Rethread Rethread; -typedef struct Resub Resub; -typedef struct Reclass Reclass; -typedef struct Reinst Reinst; -typedef struct Reprog Reprog; +#pragma incomplete Reinst +#pragma incomplete Rethread -/* - * Sub expression matches - */ -struct Resub{ +struct Resub +{ union { char *sp; @@ -21,46 +35,21 @@ struct Resub{ Rune *rep; }; }; - -/* - * character class, each pair of rune's defines a range - */ -struct Reclass{ - Rune *end; - Rune spans[64]; -}; - -/* - * Machine instructions - */ -struct Reinst{ - int type; - union { - Reclass *cp; /* class pointer */ - Rune r; /* character */ - int subid; /* sub-expression id for RBRA and LBRA */ - Reinst *right; /* right child of OR */ - }; - union { /* regexp relies on these two being in the same union */ - Reinst *left; /* left child of OR */ - Reinst *next; /* next instruction for CAT & LBRA */ - }; -}; - -/* - * Reprogram definition - */ -struct Reprog{ - Reinst *startinst; /* start pc */ - Reclass class[16]; /* .data */ - Reinst firstinst[5]; /* .text */ +struct Reprog +{ + Reinst *startinst; + Rethread *threads; + char *regstr; + int len; + int nthr; }; -extern Reprog *regcomp(char*); -extern Reprog *regcomplit(char*); -extern Reprog *regcompnl(char*); -extern void regerror(char*); -extern int regexec(Reprog*, char*, Resub*, int); -extern void regsub(char*, char*, int, Resub*, int); -extern int rregexec(Reprog*, Rune*, Resub*, int); -extern void rregsub(Rune*, Rune*, int, Resub*, int); +Reprog* regcomp(char*); +Reprog* regcomplit(char*); +Reprog* regcompnl(char*); +void regerror(char*); +int regexec(Reprog*, char*, Resub*, int); +void regsub(char*, char*, int, Resub*, int); +int rregexec(Reprog*, Rune*, Resub*, int); +void rregsub(Rune*, Rune*, int, Resub*, int); +int reprogfmt(Fmt *); diff --git a/sys/src/ape/lib/mkfile b/sys/src/ape/lib/mkfile index 3762294d3..6c24c3674 100644 --- a/sys/src/ape/lib/mkfile +++ b/sys/src/ape/lib/mkfile @@ -1,6 +1,6 @@ -#include -#include "regexp.h" -#include "regcomp.h" - -/* - * Machine state - */ -Relist* _relist[2]; -Relist* _reliste[2]; -int _relistsize = LISTINCREMENT; - -/* - * save a new match in mp - */ -extern void -_renewmatch(Resub *mp, int ms, Resublist *sp) -{ - int i; - - if(mp==0 || ms<=0) - return; - if(mp[0].s.sp==0 || sp->m[0].s.spm[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ - for(i=0; im[i]; - for(; iinst; p++){ - if(p->inst == ip){ - if((sep)->m[0].s.sp < p->se.m[0].s.sp) - p->se = *sep; - return 0; - } - } - p->inst = ip; - p->se = *sep; - (++p)->inst = 0; - return p; -} - diff --git a/sys/src/ape/lib/regexp/regcomp.c b/sys/src/ape/lib/regexp/regcomp.c deleted file mode 100644 index 5d31fc2f6..000000000 --- a/sys/src/ape/lib/regexp/regcomp.c +++ /dev/null @@ -1,560 +0,0 @@ -#include -#include -#include -#include -#include "regexp.h" -#include "regcomp.h" - -#define TRUE 1 -#define FALSE 0 - -/* - * Parser Information - */ -typedef -struct Node -{ - Reinst* first; - Reinst* last; -}Node; - -#define NSTACK 20 -static Node andstack[NSTACK]; -static Node *andp; -static int atorstack[NSTACK]; -static int* atorp; -static int cursubid; /* id of current subexpression */ -static int subidstack[NSTACK]; /* parallel to atorstack */ -static int* subidp; -static int lastwasand; /* Last token was operand */ -static int nbra; -static char* exprp; /* pointer to next character in source expression */ -static int lexdone; -static int nclass; -static Reclass*classp; -static Reinst* freep; -static int errors; -static wchar_t yyrune; /* last lex'd rune */ -static Reclass*yyclassp; /* last lex'd class */ - -/* predeclared crap */ -static void operator(int); -static void pushand(Reinst*, Reinst*); -static void pushator(int); -static void evaluntil(int); -static int bldcclass(void); - -static jmp_buf regkaboom; - -static void -rcerror(char *s) -{ - errors++; - regerror(s); - longjmp(regkaboom, 1); -} - -static Reinst* -newinst(int t) -{ - freep->type = t; - freep->l.left = 0; - freep->r.right = 0; - return freep++; -} - -static void -operand(int t) -{ - Reinst *i; - - if(lastwasand) - operator(CAT); /* catenate is implicit */ - i = newinst(t); - - if(t == CCLASS || t == NCCLASS) - i->r.cp = yyclassp; - if(t == RUNE) - i->r.r = yyrune; - - pushand(i, i); - lastwasand = TRUE; -} - -static void -operator(int t) -{ - if(t==RBRA && --nbra<0) - rcerror("unmatched right paren"); - if(t==LBRA){ - if(++cursubid >= NSUBEXP) - rcerror ("too many subexpressions"); - nbra++; - if(lastwasand) - operator(CAT); - } else - evaluntil(t); - if(t != RBRA) - pushator(t); - lastwasand = FALSE; - if(t==STAR || t==QUEST || t==PLUS || t==RBRA) - lastwasand = TRUE; /* these look like operands */ -} - -static void -regerr2(char *s, int c) -{ - char buf[100]; - char *cp = buf; - while(*s) - *cp++ = *s++; - *cp++ = c; - *cp = '\0'; - rcerror(buf); -} - -static void -cant(char *s) -{ - char buf[100]; - strcpy(buf, "can't happen: "); - strcat(buf, s); - rcerror(buf); -} - -static void -pushand(Reinst *f, Reinst *l) -{ - if(andp >= &andstack[NSTACK]) - cant("operand stack overflow"); - andp->first = f; - andp->last = l; - andp++; -} - -static void -pushator(int t) -{ - if(atorp >= &atorstack[NSTACK]) - cant("operator stack overflow"); - *atorp++ = t; - *subidp++ = cursubid; -} - -static Node* -popand(int op) -{ - Reinst *inst; - - if(andp <= &andstack[0]){ - regerr2("missing operand for ", op); - inst = newinst(NOP); - pushand(inst,inst); - } - return --andp; -} - -static int -popator(void) -{ - if(atorp <= &atorstack[0]) - cant("operator stack underflow"); - --subidp; - return *--atorp; -} - -static void -evaluntil(int pri) -{ - Node *op1, *op2; - Reinst *inst1, *inst2; - - while(pri==RBRA || atorp[-1]>=pri){ - switch(popator()){ - default: - rcerror("unknown operator in evaluntil"); - break; - case LBRA: /* must have been RBRA */ - op1 = popand('('); - inst2 = newinst(RBRA); - inst2->r.subid = *subidp; - op1->last->l.next = inst2; - inst1 = newinst(LBRA); - inst1->r.subid = *subidp; - inst1->l.next = op1->first; - pushand(inst1, inst2); - return; - case OR: - op2 = popand('|'); - op1 = popand('|'); - inst2 = newinst(NOP); - op2->last->l.next = inst2; - op1->last->l.next = inst2; - inst1 = newinst(OR); - inst1->r.right = op1->first; - inst1->l.left = op2->first; - pushand(inst1, inst2); - break; - case CAT: - op2 = popand(0); - op1 = popand(0); - op1->last->l.next = op2->first; - pushand(op1->first, op2->last); - break; - case STAR: - op2 = popand('*'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(inst1, inst1); - break; - case PLUS: - op2 = popand('+'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(op2->first, inst1); - break; - case QUEST: - op2 = popand('?'); - inst1 = newinst(OR); - inst2 = newinst(NOP); - inst1->l.left = inst2; - inst1->r.right = op2->first; - op2->last->l.next = inst2; - pushand(inst1, inst2); - break; - } - } -} - -static Reprog* -optimize(Reprog *pp) -{ - Reinst *inst, *target; - int size; - Reprog *npp; - int diff; - - /* - * get rid of NOOP chains - */ - for(inst=pp->firstinst; inst->type!=END; inst++){ - target = inst->l.next; - while(target->type == NOP) - target = target->l.next; - inst->l.next = target; - } - - /* - * The original allocation is for an area larger than - * necessary. Reallocate to the actual space used - * and then relocate the code. - */ - size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); - npp = realloc(pp, size); - if(npp==0 || npp==pp) - return pp; - diff = (char *)npp - (char *)pp; - freep = (Reinst *)((char *)freep + diff); - for(inst=npp->firstinst; insttype){ - case OR: - case STAR: - case PLUS: - case QUEST: - case CCLASS: - case NCCLASS: - *(char **)&inst->r.right += diff; - break; - } - *(char **)&inst->l.left += diff; - } - *(char **)&npp->startinst += diff; - return npp; -} - -#ifdef DEBUG -static void -dumpstack(void){ - Node *stk; - int *ip; - - print("operators\n"); - for(ip=atorstack; ipfirst->type, stk->last->type); -} - -static void -dump(Reprog *pp) -{ - Reinst *l; - wchar_t *p; - - l = pp->firstinst; - do{ - print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, - l->l.left-pp->firstinst, l->l.right-pp->firstinst); - if(l->type == RUNE) - print("\t%C\n", l->r); - else if(l->type == CCLASS || l->type == NCCLASS){ - print("\t["); - if(l->type == NCCLASS) - print("^"); - for(p = l->r.cp->spans; p < l->r.cp->end; p += 2) - if(p[0] == p[1]) - print("%C", p[0]); - else - print("%C-%C", p[0], p[1]); - print("]\n"); - } else - print("\n"); - }while(l++->type); -} -#endif - -static Reclass* -newclass(void) -{ - if(nclass >= NCLASS) - regerr2("too many character classes; limit", NCLASS+'0'); - return &(classp[nclass++]); -} - -static int -nextc(wchar_t *rp) -{ - int n; - - if(lexdone){ - *rp = 0; - return 1; - } - n = mbtowc(rp, exprp, MB_CUR_MAX); - if (n <= 0) - n = 1; - exprp += n; - if(*rp == L'\\'){ - n = mbtowc(rp, exprp, MB_CUR_MAX); - if (n <= 0) - n = 1; - exprp += n; - return 1; - } - if(*rp == 0) - lexdone = 1; - return 0; -} - -static int -lex(int literal, int dot_type) -{ - int quoted; - - quoted = nextc(&yyrune); - if(literal || quoted){ - if(yyrune == 0) - return END; - return RUNE; - } - - switch(yyrune){ - case 0: - return END; - case L'*': - return STAR; - case L'?': - return QUEST; - case L'+': - return PLUS; - case L'|': - return OR; - case L'.': - return dot_type; - case L'(': - return LBRA; - case L')': - return RBRA; - case L'^': - return BOL; - case L'$': - return EOL; - case L'[': - return bldcclass(); - } - return RUNE; -} - -static int -bldcclass(void) -{ - int type; - wchar_t r[NCCRUNE]; - wchar_t *p, *ep, *np; - wchar_t rune; - int quoted; - - /* we have already seen the '[' */ - type = CCLASS; - yyclassp = newclass(); - - /* look ahead for negation */ - ep = r; - quoted = nextc(&rune); - if(!quoted && rune == L'^'){ - type = NCCLASS; - quoted = nextc(&rune); - *ep++ = L'\n'; - *ep++ = L'\n'; - } - - /* parse class into a set of spans */ - for(; ep<&r[NCCRUNE];){ - if(rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - if(!quoted && rune == L']') - break; - if(!quoted && rune == L'-'){ - if(ep == r){ - rcerror("malformed '[]'"); - return 0; - } - quoted = nextc(&rune); - if((!quoted && rune == L']') || rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - *(ep-1) = rune; - } else { - *ep++ = rune; - *ep++ = rune; - } - quoted = nextc(&rune); - } - - /* sort on span start */ - for(p = r; p < ep; p += 2){ - for(np = p; np < ep; np += 2) - if(*np < *p){ - rune = np[0]; - np[0] = p[0]; - p[0] = rune; - rune = np[1]; - np[1] = p[1]; - p[1] = rune; - } - } - - /* merge spans */ - np = yyclassp->spans; - p = r; - if(r == ep) - yyclassp->end = np; - else { - np[0] = *p++; - np[1] = *p++; - for(; p < ep; p += 2) - if(p[0] <= np[1]){ - if(p[1] > np[1]) - np[1] = p[1]; - } else { - np += 2; - np[0] = p[0]; - np[1] = p[1]; - } - yyclassp->end = np+2; - } - - return type; -} - -static Reprog* -regcomp1(char *s, int literal, int dot_type) -{ - int token; - Reprog *pp; - - /* get memory for the program */ - pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); - if(pp == 0){ - regerror("out of memory"); - return 0; - } - freep = pp->firstinst; - classp = pp->class; - errors = 0; - - if(setjmp(regkaboom)) - goto out; - - /* go compile the sucker */ - lexdone = 0; - exprp = s; - nclass = 0; - nbra = 0; - atorp = atorstack; - andp = andstack; - subidp = subidstack; - lastwasand = FALSE; - cursubid = 0; - - /* Start with a low priority operator to prime parser */ - pushator(START-1); - while((token = lex(literal, dot_type)) != END){ - if((token&0300) == OPERATOR) - operator(token); - else - operand(token); - } - - /* Close with a low priority operator */ - evaluntil(START); - - /* Force END */ - operand(END); - evaluntil(START); -#ifdef DEBUG - dumpstack(); -#endif - if(nbra) - rcerror("unmatched left paren"); - --andp; /* points to first and only operand */ - pp->startinst = andp->first; -#ifdef DEBUG - dump(pp); -#endif - pp = optimize(pp); -#ifdef DEBUG - print("start: %d\n", andp->first-pp->firstinst); - dump(pp); -#endif -out: - if(errors){ - free(pp); - pp = 0; - } - return pp; -} - -extern Reprog* -regcomp(char *s) -{ - return regcomp1(s, 0, ANY); -} - -extern Reprog* -regcomplit(char *s) -{ - return regcomp1(s, 1, ANY); -} - -extern Reprog* -regcompnl(char *s) -{ - return regcomp1(s, 0, ANYNL); -} diff --git a/sys/src/ape/lib/regexp/regcomp.h b/sys/src/ape/lib/regexp/regcomp.h deleted file mode 100644 index 082e6137e..000000000 --- a/sys/src/ape/lib/regexp/regcomp.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * substitution list - */ -typedef struct Resublist Resublist; -struct Resublist -{ - Resub m[32]; -}; - -/* max subexpressions per program */ -Resublist ReSuBlIsT; -#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub)) - -/* max character classes per program */ -Reprog RePrOg; -#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) - -/* max rune ranges per character class */ -#define NCCRUNE (sizeof(Reclass)/sizeof(wchar_t)) - -/* - * Actions and Tokens (Reinst types) - * - * 02xx are operators, value == precedence - * 03xx are tokens, i.e. operands for operators - */ -#define RUNE 0177 -#define OPERATOR 0200 /* Bitmask of all operators */ -#define START 0200 /* Start, used for marker on stack */ -#define RBRA 0201 /* Right bracket, ) */ -#define LBRA 0202 /* Left bracket, ( */ -#define OR 0203 /* Alternation, | */ -#define CAT 0204 /* Concatentation, implicit operator */ -#define STAR 0205 /* Closure, * */ -#define PLUS 0206 /* a+ == aa* */ -#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0300 /* Any character except newline, . */ -#define ANYNL 0301 /* Any character including newline, . */ -#define NOP 0302 /* No operation, internal use only */ -#define BOL 0303 /* Beginning of line, ^ */ -#define EOL 0304 /* End of line, $ */ -#define CCLASS 0305 /* Character class, [] */ -#define NCCLASS 0306 /* Negated character class, [] */ -#define END 0377 /* Terminate: match found */ - -/* - * regexec execution lists - */ -#define LISTINCREMENT 8 -typedef struct Relist Relist; -struct Relist -{ - Reinst *inst; /* Reinstruction of the thread */ - Resublist se; /* matched subexpressions in this thread */ -}; -extern Relist* _relist[2]; -extern Relist* _reliste[2]; -extern int _relistsize; - -extern Relist* _renewthread(Relist*, Reinst*, Resublist*); -extern void _renewmatch(Resub*, int, Resublist*); diff --git a/sys/src/ape/lib/regexp/regerror.c b/sys/src/ape/lib/regexp/regerror.c deleted file mode 100644 index bfa158299..000000000 --- a/sys/src/ape/lib/regexp/regerror.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include -#include -#include "regexp.h" - -void -regerror(char *s) -{ - char buf[132]; - - strcpy(buf, "regerror: "); - strcat(buf, s); - strcat(buf, "\n"); - fwrite(buf, 1, strlen(buf), stderr); - exit(1); -} diff --git a/sys/src/ape/lib/regexp/regexec.c b/sys/src/ape/lib/regexp/regexec.c deleted file mode 100644 index dee888b09..000000000 --- a/sys/src/ape/lib/regexp/regexec.c +++ /dev/null @@ -1,191 +0,0 @@ -#include -#include -#include "regexp.h" -#include "regcomp.h" - -static Resublist sempty; /* empty set of matches */ - -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -regexec1(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - char *starts, - char *eol, - wchar_t startchar) -{ - int flag=0; - Reinst *inst; - Relist *tlp; - char *s; - int i, checkstart; - wchar_t r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - - match = 0; - checkstart = startchar; - sempty.m[0].s.sp = 0; - if(mp!=0) - for(i=0; iinst = 0; - - /* Add first instruction to current list */ - if(match == 0){ - sempty.m[0].s.sp = s; - _renewthread(tl, progp->startinst, &sempty); - } - - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - if(s == eol) - break; - - for(inst = tlp->inst; ; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->r.subid].s.sp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.ep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.ep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - checkstart = startchar && nl->inst==0; - s += n; - }while(r); - return match; -} - -extern int -regexec(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - char *starts; /* where to start match */ - char *eol; /* where to end match */ - wchar_t startchar; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - starts = bol; - eol = 0; - if(mp && ms>0){ - if(mp->s.sp) - starts = mp->s.sp; - if(mp->e.ep) - eol = mp->e.ep; - } - startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) - ? progp->startinst->r.r : 0; - - /* keep trying till we have enough list space to terminate */ - for(;;){ - if(_relist[0] == 0){ - _relist[0] = malloc(2*_relistsize*sizeof(Relist)); - _relist[1] = _relist[0] + _relistsize; - _reliste[0] = _relist[0] + _relistsize - 1; - _reliste[1] = _relist[1] + _relistsize - 1; - if(_relist[0] == 0) - regerror("_relist overflow"); - } - rv = regexec1(progp, bol, mp, ms, starts, eol, startchar); - if(rv >= 0) - return rv; - free(_relist[0]); - _relist[0] = 0; - _relistsize += LISTINCREMENT; - } -} diff --git a/sys/src/ape/lib/regexp/regsub.c b/sys/src/ape/lib/regexp/regsub.c deleted file mode 100644 index 0c891e9c6..000000000 --- a/sys/src/ape/lib/regexp/regsub.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include "regexp.h" - -/* substitute into one string using the matches from the last regexec() */ -extern void -regsub(char *sp, /* source string */ - char *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ -{ - char *ssp, *ep; - int i; - - ep = dp+dlen-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp[i].s.sp != 0 && mp!=0 && ms>i) - for(ssp = mp[i].s.sp; - ssp < mp[i].e.ep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp[0].s.sp != 0 && mp!=0 && ms>0) - if(mp[0].s.sp != 0) - for(ssp = mp[0].s.sp; - ssp < mp[0].e.ep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; -} diff --git a/sys/src/ape/lib/regexp/rregexec.c b/sys/src/ape/lib/regexp/rregexec.c deleted file mode 100644 index 519ac25ad..000000000 --- a/sys/src/ape/lib/regexp/rregexec.c +++ /dev/null @@ -1,181 +0,0 @@ -#include -#include -#include "regexp.h" -#include "regcomp.h" - -static Resublist sempty; /* empty set of matches */ - -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -rregexec1(Reprog *progp, /* program to run */ - wchar_t *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - wchar_t *starts, - wchar_t *eol, - wchar_t startchar) -{ - int flag=0; - Reinst *inst; - Relist *tlp; - wchar_t *s; - int i, checkstart; - wchar_t r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - - match = 0; - checkstart = startchar; - sempty.m[0].s.rsp = 0; - if(mp!=0) - for(i=0; iinst = 0; - - /* Add first instruction to current list */ - sempty.m[0].s.rsp = s; - _renewthread(tl, progp->startinst, &sempty); - - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - if(s == eol) - break; - - for(inst=tlp->inst; ; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->r.subid].s.rsp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.rep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.rep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - checkstart = startchar && nl->inst==0; - s++; - }while(r); - return match; -} - -extern int -rregexec(Reprog *progp, /* program to run */ - wchar_t *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - wchar_t *starts; /* where to start match */ - wchar_t *eol; /* where to end match */ - wchar_t startchar; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - starts = bol; - eol = 0; - if(mp && ms>0){ - if(mp->s.rsp) - starts = mp->s.rsp; - if(mp->e.rep) - eol = mp->e.rep; - } - startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0; - - /* keep trying till we have enough list space to terminate */ - for(;;){ - if(_relist[0] == 0){ - _relist[0] = malloc(2*_relistsize*sizeof(Relist)); - _relist[1] = _relist[0] + _relistsize; - _reliste[0] = _relist[0] + _relistsize - 1; - _reliste[1] = _relist[1] + _relistsize - 1; - if(_relist[0] == 0) - regerror("_relist overflow"); - } - rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar); - if(rv >= 0) - return rv; - free(_relist[0]); - _relist[0] = 0; - _relistsize += LISTINCREMENT; - } -} diff --git a/sys/src/ape/lib/regexp/rregsub.c b/sys/src/ape/lib/regexp/rregsub.c deleted file mode 100644 index 5fc3e5960..000000000 --- a/sys/src/ape/lib/regexp/rregsub.c +++ /dev/null @@ -1,64 +0,0 @@ -#include -#include -#include "regexp.h" - -/* substitute into one string using the matches from the last regexec() */ -extern void -rregsub(wchar_t *sp, /* source string */ - wchar_t *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ -{ - wchar_t *ssp, *ep; - int i; - - ep = dp+(dlen/sizeof(wchar_t))-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp[i].s.rsp != 0 && mp!=0 && ms>i) - for(ssp = mp[i].s.rsp; - ssp < mp[i].e.rep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp[0].s.rsp != 0 && mp!=0 && ms>0) - if(mp[0].s.rsp != 0) - for(ssp = mp[0].s.rsp; - ssp < mp[0].e.rep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; -} diff --git a/sys/src/cmd/awk/awk.h b/sys/src/cmd/awk/awk.h index 0bdcd8183..2ede5e1cf 100644 --- a/sys/src/cmd/awk/awk.h +++ b/sys/src/cmd/awk/awk.h @@ -6,20 +6,18 @@ Copyright (c) Lucent Technologies 1997 typedef double Awkfloat; -/* unsigned char is more trouble than it's worth */ - -typedef unsigned char uschar; - -#define xfree(a) { if ((a) != NULL) { free((char *) a); a = NULL; } } +#define xfree(a) { if ((a) != nil) { free((a)); (a) = nil; } } #define DEBUG #ifdef DEBUG /* uses have to be doubly parenthesized */ -# define dprintf(x) if (dbg) printf x +# define dprint(x) if (dbg) print x #else -# define dprintf(x) +# define dprint(x) #endif +#define FOPEN_MAX 40 /* max number of open files */ + extern char errbuf[]; extern int compile_time; /* 1 if compiling, 0 if running */ @@ -28,6 +26,10 @@ extern int safe; /* 0 => unsafe, 1 => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern Biobuf stdin; +extern Biobuf stdout; +extern Biobuf stderr; + extern char **FS; extern char **RS; extern char **ORS; @@ -56,8 +58,8 @@ extern int patlen; /* length of pattern matched. set in b.c */ /* Cell: all information about a variable or constant */ typedef struct Cell { - uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */ - uschar csub; /* CCON, CTEMP, CFLD, etc. */ + uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */ + uchar csub; /* CCON, CTEMP, CFLD, etc. */ char *nval; /* name, for variables only */ char *sval; /* string value */ Awkfloat fval; /* value as number */ @@ -66,7 +68,7 @@ typedef struct Cell { } Cell; typedef struct Array { /* symbol table array */ - int nelem; /* elements in table right now */ + int nelemt; /* elements in table right now */ int size; /* size of tab */ Cell **tab; /* hash table pointers */ } Array; diff --git a/sys/src/cmd/awk/awkgram.y b/sys/src/cmd/awk/awkgram.y index 1216beb21..7ddf6087b 100644 --- a/sys/src/cmd/awk/awkgram.y +++ b/sys/src/cmd/awk/awkgram.y @@ -23,8 +23,9 @@ THIS SOFTWARE. ****************************************************************/ %{ -#include -#include +#include +#include +#include #include "awk.h" #define makedfa(a,b) compre(a) diff --git a/sys/src/cmd/awk/lex.c b/sys/src/cmd/awk/lex.c index d59317e10..90ecf4245 100644 --- a/sys/src/cmd/awk/lex.c +++ b/sys/src/cmd/awk/lex.c @@ -22,10 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -#include -#include -#include +#include +#include #include +#include #include "awk.h" #include "y.tab.h" @@ -90,9 +90,8 @@ Keyword keywords[] ={ /* keep sorted: binary searched */ { "while", WHILE, WHILE }, }; -#define DEBUG #ifdef DEBUG -#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } +#define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); } #else #define RET(x) return(x) #endif @@ -170,7 +169,7 @@ int yylex(void) static char *buf = 0; static int bufsize = 500; - if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil) FATAL( "out of space in yylex" ); if (sc) { sc = 0; @@ -353,7 +352,7 @@ int string(void) static char *buf = 0; static int bufsz = 500; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0)) @@ -401,7 +400,7 @@ int string(void) } *px = 0; unput(c); - sscanf(xbuf, "%x", &n); + n = strtol(xbuf, nil, 16); *bp++ = n; break; } @@ -497,7 +496,7 @@ int regexpr(void) static int bufsz = 500; char *bp; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil) FATAL("out of space for rex expr"); bp = buf; for ( ; (c = input()) != '/' && c != 0; ) { @@ -526,7 +525,7 @@ char ebuf[300]; char *ep = ebuf; char yysbuf[100]; /* pushback buffer */ char *yysptr = yysbuf; -FILE *yyin = 0; +Biobuf *yyin; int input(void) /* get next lexical input character */ { @@ -535,14 +534,14 @@ int input(void) /* get next lexical input character */ if (yysptr > yysbuf) c = *--yysptr; - else if (lexprog != NULL) { /* awk '...' */ + else if (lexprog != nil) { /* awk '...' */ if ((c = *lexprog) != 0) lexprog++; } else /* awk -f ... */ c = pgetc(); if (c == '\n') lineno++; - else if (c == EOF) + else if (c == Beof) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; diff --git a/sys/src/cmd/awk/lib.c b/sys/src/cmd/awk/lib.c index 5a6273690..2d60e3abb 100644 --- a/sys/src/cmd/awk/lib.c +++ b/sys/src/cmd/awk/lib.c @@ -22,17 +22,14 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -#define DEBUG -#include -#include +#include +#include #include -#include -#include -#include +#include #include "awk.h" #include "y.tab.h" -FILE *infile = NULL; +Biobuf *infile; char *file = ""; char *record; int recsize = RECSIZE; @@ -50,17 +47,17 @@ int donerec; /* 1 = record is valid (no flds have changed) */ int lastfld = 0; /* last used field */ int argno = 1; /* current input argument number */ -extern Awkfloat *ARGC; +extern Awkfloat *AARGC; -static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; -static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; +static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE }; +static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE }; void recinit(unsigned int n) { record = (char *) malloc(n); fields = (char *) malloc(n); fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *)); - if (record == NULL || fields == NULL || fldtab == NULL) + if (record == nil || fields == nil || fldtab == nil) FATAL("out of space for $0 and fields"); fldtab[0] = (Cell *) malloc(sizeof (Cell)); *fldtab[0] = dollar0; @@ -76,10 +73,10 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */ for (i = n1; i <= n2; i++) { fldtab[i] = (Cell *) malloc(sizeof (struct Cell)); - if (fldtab[i] == NULL) + if (fldtab[i] == nil) FATAL("out of space in makefields %d", i); *fldtab[i] = dollar1; - sprintf(temp, "%d", i); + sprint(temp, "%d", i); fldtab[i]->nval = tostring(temp); } } @@ -89,7 +86,7 @@ void initgetrec(void) int i; char *p; - for (i = 1; i < *ARGC; i++) { + for (i = 1; i < *AARGC; i++) { if (!isclvar(p = getargv(i))) { /* find 1st real filename */ setsval(lookup("FILENAME", symtab), getargv(i)); return; @@ -97,7 +94,7 @@ void initgetrec(void) setclvar(p); /* a commandline assignment before filename */ argno++; } - infile = stdin; /* no filenames, so use stdin */ + infile = &stdin; /* no filenames, so use &stdin */ } int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ @@ -111,16 +108,16 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ firsttime = 0; initgetrec(); } - dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", - *RS, *FS, *ARGC, *FILENAME) ); + dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n", + *RS, *FS, *AARGC, *FILENAME) ); if (isrecord) { donefld = 0; donerec = 1; } buf[0] = 0; - while (argno < *ARGC || infile == stdin) { - dprintf( ("argno=%d, file=|%s|\n", argno, file) ); - if (infile == NULL) { /* have to open a new file */ + while (argno < *AARGC || infile == &stdin) { + dprint( ("argno=%d, file=|%s|\n", argno, file) ); + if (infile == nil) { /* have to open a new file */ file = getargv(argno); if (*file == '\0') { /* it's been zapped */ argno++; @@ -132,10 +129,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ continue; } *FILENAME = file; - dprintf( ("opening file %s\n", file) ); + dprint( ("opening file %s\n", file) ); if (*file == '-' && *(file+1) == '\0') - infile = stdin; - else if ((infile = fopen(file, "r")) == NULL) + infile = &stdin; + else if ((infile = Bopen(file, OREAD)) == nil) FATAL("can't open file %s", file); setfval(fnrloc, 0.0); } @@ -157,10 +154,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ *pbufsize = bufsize; return 1; } - /* EOF arrived on this file; set up next */ - if (infile != stdin) - fclose(infile); - infile = NULL; + /* Beof arrived on this file; set up next */ + if (infile != &stdin) + Bterm(infile); + infile = nil; argno++; } *pbuf = buf; @@ -170,13 +167,13 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ void nextfile(void) { - if (infile != stdin) - fclose(infile); - infile = NULL; + if (infile != &stdin) + Bterm(infile); + infile = nil; argno++; } -int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ +int readrec(char **pbuf, int *pbufsize, Biobuf *inf) /* read one record into buf */ { int sep, c; char *rr, *buf = *pbuf; @@ -187,21 +184,21 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * strcpy(inputFS, *FS); /* for subsequent field splitting */ if ((sep = **RS) == 0) { sep = '\n'; - while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + while ((c=Bgetc(inf)) == '\n' && c != Beof) /* skip leading \n's */ ; - if (c != EOF) - ungetc(c, inf); + if (c != Beof) + Bungetc(inf); } for (rr = buf; ; ) { - for (; (c=getc(inf)) != sep && c != EOF; ) { + for (; (c=Bgetc(inf)) != sep && c != Beof; ) { if (rr-buf+1 > bufsize) if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) FATAL("input record `%.30s...' too long", buf); *rr++ = c; } - if (**RS == sep || c == EOF) + if (**RS == sep || c == Beof) break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + if ((c = Bgetc(inf)) == '\n' || c == Beof) /* 2 in a row */ break; if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) FATAL("input record `%.30s...' too long", buf); @@ -211,10 +208,10 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf * if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) FATAL("input record `%.30s...' too long", buf); *rr = 0; - dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); + dprint( ("readrec saw <%s>, returns %d\n", buf, c == Beof && rr == buf ? 0 : 1) ); *pbuf = buf; *pbufsize = bufsize; - return c == EOF && rr == buf ? 0 : 1; + return c == Beof && rr == buf ? 0 : 1; } char *getargv(int n) /* get ARGV[n] */ @@ -223,10 +220,10 @@ char *getargv(int n) /* get ARGV[n] */ char *s, temp[50]; extern Array *ARGVtab; - sprintf(temp, "%d", n); + sprint(temp, "%d", n); x = setsymtab(temp, "", 0.0, STR, ARGVtab); s = getsval(x); - dprintf( ("getargv(%d) returns |%s|\n", n, s) ); + dprint( ("getargv(%d) returns |%s|\n", n, s) ); return s; } @@ -245,7 +242,7 @@ void setclvar(char *s) /* set var=value from s */ q->fval = atof(q->sval); q->tval |= NUM; } - dprintf( ("command line set %s to |%s|\n", s, p) ); + dprint( ("command line set %s to |%s|\n", s, p) ); } @@ -265,7 +262,7 @@ void fldbld(void) /* create fields from current record */ n = strlen(r); if (n > fieldssize) { xfree(fields); - if ((fields = (char *) malloc(n+1)) == NULL) + if ((fields = (char *) malloc(n+1)) == nil) FATAL("out of space for fields in fldbld %d", n); fieldssize = n; } @@ -273,7 +270,7 @@ void fldbld(void) /* create fields from current record */ i = 0; /* number of fields accumulated here */ if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); - } else if ((sep = *inputFS) == ' ') { /* default whitespace */ + } else if (*inputFS == ' ') { /* default whitespace */ for (i = 0; ; ) { while (*r == ' ' || *r == '\t' || *r == '\n') r++; @@ -339,7 +336,7 @@ void fldbld(void) /* create fields from current record */ if (dbg) { for (j = 0; j <= lastfld; j++) { p = fldtab[j]; - printf("field %d (%s): |%s|\n", j, p->nval, p->sval); + print("field %d (%s): |%s|\n", j, p->nval, p->sval); } } } @@ -383,7 +380,7 @@ void growfldtab(int n) /* make new fields up to at least $n */ if (n > nf) nf = n; fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *))); - if (fldtab == NULL) + if (fldtab == nil) FATAL("out of space creating %d fields", nf); makefields(nfields+1, nf); nfields = nf; @@ -395,12 +392,12 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */ /* the fields are all stored in this one array with \0's */ char *fr; void *p; - int i, tempstat, n; + int i, n; n = strlen(rec); if (n > fieldssize) { xfree(fields); - if ((fields = (char *) malloc(n+1)) == NULL) + if ((fields = (char *) malloc(n+1)) == nil) FATAL("out of space for fields in refldbld %d", n); fieldssize = n; } @@ -409,7 +406,7 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */ if (*rec == '\0') return 0; p = compre(fs); - dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); + dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); for (i = 1; ; i++) { if (i > nfields) growfldtab(i); @@ -417,15 +414,15 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */ xfree(fldtab[i]->sval); fldtab[i]->tval = FLD | STR | DONTFREE; fldtab[i]->sval = fr; - dprintf( ("refldbld: i=%d\n", i) ); + dprint( ("refldbld: i=%d\n", i) ); if (nematch(p, rec, rec)) { - dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); + dprint( ("match %s (%d chars)\n", patbeg, patlen) ); strncpy(fr, rec, patbeg-rec); fr += patbeg - rec + 1; *(fr-1) = '\0'; rec = patbeg + patlen; } else { - dprintf( ("no match %s\n", rec) ); + dprint( ("no match %s\n", rec) ); strcpy(fr, rec); break; } @@ -457,15 +454,15 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3")) FATAL("built giant record `%.30s...'", record); *r = '\0'; - dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); + dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->tval = REC | STR | DONTFREE; fldtab[0]->sval = record; - dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); - dprintf( ("recbld = |%s|\n", record) ); + dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) ); + dprint( ("recbld = |%s|\n", record) ); donerec = 1; } @@ -484,24 +481,26 @@ void SYNTAX(char *fmt, ...) if (been_here++ > 2) return; - fprintf(stderr, "%s: ", cmdname); + Bprint(&stderr, "%s: ", cmdname); va_start(varg, fmt); - vfprintf(stderr, fmt, varg); + Bvprint(&stderr, fmt, varg); va_end(varg); - if(compile_time == 1 && cursource() != NULL) - fprintf(stderr, " at %s:%d", cursource(), lineno); + if(compile_time == 1 && cursource() != nil) + Bprint(&stderr, " at %s:%d", cursource(), lineno); else - fprintf(stderr, " at line %d", lineno); - if (curfname != NULL) - fprintf(stderr, " in function %s", curfname); - fprintf(stderr, "\n"); + Bprint(&stderr, " at line %d", lineno); + if (curfname != nil) + Bprint(&stderr, " in function %s", curfname); + Bprint(&stderr, "\n"); errorflag = 2; eprint(); } -void fpecatch(int n) +int handler(void *, char *err) { - FATAL("floating point exception %d", n); + Bflush(&stdout); + fprint(2, "%s\n", err); + return 0; } extern int bracecnt, brackcnt, parencnt; @@ -513,23 +512,23 @@ void bracecheck(void) if (beenhere++) return; - while ((c = input()) != EOF && c != '\0') + while ((c = input()) != Beof && c != '\0') bclass(c); bcheck2(bracecnt, '{', '}'); bcheck2(brackcnt, '[', ']'); bcheck2(parencnt, '(', ')'); } -void bcheck2(int n, int c1, int c2) +void bcheck2(int n, int, int c2) { if (n == 1) - fprintf(stderr, "\tmissing %c\n", c2); + Bprint(&stderr, "\tmissing %c\n", c2); else if (n > 1) - fprintf(stderr, "\t%d missing %c's\n", n, c2); + Bprint(&stderr, "\t%d missing %c's\n", n, c2); else if (n == -1) - fprintf(stderr, "\textra %c\n", c2); + Bprint(&stderr, "\textra %c\n", c2); else if (n < -1) - fprintf(stderr, "\t%d extra %c's\n", -n, c2); + Bprint(&stderr, "\t%d extra %c's\n", -n, c2); } void FATAL(char *fmt, ...) @@ -537,15 +536,15 @@ void FATAL(char *fmt, ...) extern char *cmdname; va_list varg; - fflush(stdout); - fprintf(stderr, "%s: ", cmdname); + Bflush(&stdout); + Bprint(&stderr, "%s: ", cmdname); va_start(varg, fmt); - vfprintf(stderr, fmt, varg); + Bvprint(&stderr, fmt, varg); va_end(varg); error(); if (dbg > 1) /* core dump if serious debugging on */ abort(); - exit(2); + exits("FATAL"); } void WARNING(char *fmt, ...) @@ -553,10 +552,10 @@ void WARNING(char *fmt, ...) extern char *cmdname; va_list varg; - fflush(stdout); - fprintf(stderr, "%s: ", cmdname); + Bflush(&stdout); + Bprint(&stderr, "%s: ", cmdname); va_start(varg, fmt); - vfprintf(stderr, fmt, varg); + Bvprint(&stderr, fmt, varg); va_end(varg); error(); } @@ -566,13 +565,13 @@ void error() extern Node *curnode; int line; - fprintf(stderr, "\n"); + Bprint(&stderr, "\n"); if (compile_time != 2 && NR && *NR > 0) { if (strcmp(*FILENAME, "-") != 0) - fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR)); + Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR)); else - fprintf(stderr, " input record number %d", (int) (*FNR)); - fprintf(stderr, "\n"); + Bprint(&stderr, " input record number %d", (int) (*FNR)); + Bprint(&stderr, "\n"); } if (compile_time != 2 && curnode) line = curnode->lineno; @@ -580,14 +579,14 @@ void error() line = lineno; else line = -1; - if (compile_time == 1 && cursource() != NULL){ + if (compile_time == 1 && cursource() != nil){ if(line >= 0) - fprintf(stderr, " source %s:%d", cursource(), line); + Bprint(&stderr, " source %s:%d", cursource(), line); else - fprintf(stderr, " source file %s", cursource()); + Bprint(&stderr, " source file %s", cursource()); }else if(line >= 0) - fprintf(stderr, " source line %d", line); - fprintf(stderr, "\n"); + Bprint(&stderr, " source line %d", line); + Bprint(&stderr, "\n"); eprint(); } @@ -607,23 +606,23 @@ void eprint(void) /* try to print context around error */ ; while (*p == '\n') p++; - fprintf(stderr, " context is\n\t"); + Bprint(&stderr, " context is\n\t"); for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) ; for ( ; p < q; p++) if (*p) - putc(*p, stderr); - fprintf(stderr, " >>> "); + Bputc(&stderr, *p); + Bprint(&stderr, " >>> "); for ( ; p < ep; p++) if (*p) - putc(*p, stderr); - fprintf(stderr, " <<< "); + Bputc(&stderr, *p); + Bprint(&stderr, " <<< "); if (*ep) - while ((c = input()) != '\n' && c != '\0' && c != EOF) { - putc(c, stderr); + while ((c = input()) != '\n' && c != '\0' && c != Beof) { + Bputc(&stderr, c); bclass(c); } - putc('\n', stderr); + Bputc(&stderr, '\n'); ep = ebuf; } @@ -642,12 +641,10 @@ void bclass(int c) double errcheck(double x, char *s) { - if (errno == EDOM) { - errno = 0; + if (isNaN(x)) { WARNING("%s argument out of domain", s); x = 1; - } else if (errno == ERANGE) { - errno = 0; + } else if (isInf(x, 1) || isInf(x, -1)) { WARNING("%s result out of range", s); x = 1; } @@ -668,7 +665,6 @@ int isclvar(char *s) /* is s of form var=something ? */ /* strtod is supposed to be a proper test of what's a valid number */ -#include int is_number(char *s) { double r; @@ -699,9 +695,8 @@ int is_number(char *s) return 0; /* can't be a number */ } - errno = 0; r = strtod(s, &ep); - if (ep == s || r == HUGE_VAL || errno == ERANGE) + if (ep == s || isInf(r, 1) || isInf(r, -1)) return 0; while (*ep == ' ' || *ep == '\t' || *ep == '\n') ep++; diff --git a/sys/src/cmd/awk/main.c b/sys/src/cmd/awk/main.c index 532cc1f64..3d1ae4e0d 100644 --- a/sys/src/cmd/awk/main.c +++ b/sys/src/cmd/awk/main.c @@ -24,21 +24,21 @@ THIS SOFTWARE. char *version = "version 19990602"; -#define DEBUG -#include -#include -#include -#include -#include +#include +#include +#include #include "awk.h" #include "y.tab.h" -extern char **environ; extern int nfields; +Biobuf stdin; +Biobuf stdout; +Biobuf stderr; + int dbg = 0; char *cmdname; /* gets argv[0] for error messages */ -extern FILE *yyin; /* lex input file */ +extern Biobuf *yyin; /* lex input file */ char *lexprog; /* points to program argument if it exists */ extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ int compile_time = 2; /* for error printing: */ @@ -50,18 +50,23 @@ int curpfile = 0; /* current filename */ int safe = 0; /* 1 => "safe" mode */ -int main(int argc, char *argv[]) +void main(int argc, char *argv[]) { - char *fs = NULL, *marg; + char *fs = nil, *marg; int temp; + Binit(&stdin, 0, OREAD); + Binit(&stdout, 1, OWRITE); + Binit(&stderr, 2, OWRITE); + cmdname = argv[0]; if (argc == 1) { - fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname); - exit(1); + Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname); + exits("usage"); } - signal(SIGFPE, fpecatch); - yyin = NULL; + + atnotify(handler, 1); + yyin = nil; symtab = makesymtab(NSYMTAB); while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ @@ -94,7 +99,7 @@ int main(int argc, char *argv[]) else if (argc > 1 && argv[1][0] != 0) fs = &argv[1][0]; } - if (fs == NULL || *fs == '\0') + if (fs == nil || *fs == '\0') WARNING("field separator FS is empty"); break; case 'v': /* -v a=1 to be done NOW. one -v for each */ @@ -120,11 +125,11 @@ int main(int argc, char *argv[]) dbg = atoi(&argv[1][2]); if (dbg == 0) dbg = 1; - printf("awk %s\n", version); + print("awk %s\n", version); break; case 'V': /* added for exptools "standard" */ - printf("awk %s\n", version); - exit(0); + print("awk %s\n", version); + exits(0); break; default: WARNING("unknown option %s ignored", argv[1]); @@ -137,10 +142,10 @@ int main(int argc, char *argv[]) if (npfile == 0) { /* no -f; first argument is program */ if (argc <= 1) { if (dbg) - exit(0); + exits(0); FATAL("no program given"); } - dprintf( ("program = |%s|\n", argv[1]) ); + dprint( ("program = |%s|\n", argv[1]) ); lexprog = argv[1]; argc--; argv++; @@ -149,20 +154,20 @@ int main(int argc, char *argv[]) syminit(); compile_time = 1; argv[0] = cmdname; /* put prog name at front of arglist */ - dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); + dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); arginit(argc, argv); - if (!safe) - envinit(environ); yyparse(); if (fs) *FS = qstring(fs, '\0'); - dprintf( ("errorflag=%d\n", errorflag) ); + dprint( ("errorflag=%d\n", errorflag) ); if (errorflag == 0) { compile_time = 0; run(winner); } else bracecheck(); - return(errorflag); + if(errorflag) + exits("error"); + exits(0); } int pgetc(void) /* get 1 character from awk program */ @@ -170,20 +175,20 @@ int pgetc(void) /* get 1 character from awk program */ int c; for (;;) { - if (yyin == NULL) { + if (yyin == nil) { if (curpfile >= npfile) - return EOF; + return Beof; if (strcmp(pfile[curpfile], "-") == 0) - yyin = stdin; - else if ((yyin = fopen(pfile[curpfile], "r")) == NULL) + yyin = &stdin; + else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil) FATAL("can't open file %s", pfile[curpfile]); lineno = 1; } - if ((c = getc(yyin)) != EOF) + if ((c = Bgetc(yyin)) != Beof) return c; - if (yyin != stdin) - fclose(yyin); - yyin = NULL; + if (yyin != &stdin) + Bterm(yyin); + yyin = nil; curpfile++; } } @@ -193,5 +198,5 @@ char *cursource(void) /* current source file name */ if (npfile > 0) return pfile[curpfile]; else - return NULL; + return nil; } diff --git a/sys/src/cmd/awk/maketab.c b/sys/src/cmd/awk/maketab.c index 13d34e96e..70e214a50 100644 --- a/sys/src/cmd/awk/maketab.c +++ b/sys/src/cmd/awk/maketab.c @@ -28,9 +28,9 @@ THIS SOFTWARE. * it finds the indices in y.tab.h, produced by yacc. */ -#include -#include -#include +#include +#include +#include #include "awk.h" #include "y.tab.h" @@ -39,7 +39,7 @@ struct xx char *name; char *pname; } proc[] = { - { PROGRAM, "program", NULL }, + { PROGRAM, "program", nil }, { BOR, "boolop", " || " }, { AND, "boolop", " && " }, { NOT, "boolop", " !" }, @@ -49,13 +49,13 @@ struct xx { LT, "relop", " < " }, { GE, "relop", " >= " }, { GT, "relop", " > " }, - { ARRAY, "array", NULL }, + { ARRAY, "array", nil }, { INDIRECT, "indirect", "$(" }, { SUBSTR, "substr", "substr" }, { SUB, "sub", "sub" }, { GSUB, "gsub", "gsub" }, { INDEX, "sindex", "sindex" }, - { SPRINTF, "awksprintf", "sprintf " }, + { SPRINTF, "awksprintf", "sprintf" }, { ADD, "arith", " + " }, { MINUS, "arith", " - " }, { MULT, "arith", " * " }, @@ -68,8 +68,8 @@ struct xx { PREDECR, "incrdecr", "--" }, { POSTDECR, "incrdecr", "--" }, { CAT, "cat", " " }, - { PASTAT, "pastat", NULL }, - { PASTAT2, "dopa2", NULL }, + { PASTAT, "pastat", nil }, + { PASTAT2, "dopa2", nil }, { MATCH, "matchop", " ~ " }, { NOTMATCH, "matchop", " !~ " }, { MATCHFCN, "matchop", "matchop" }, @@ -110,59 +110,62 @@ struct xx char *table[SIZE]; char *names[SIZE]; -int main(int argc, char *argv[]) +void main(int, char**) { struct xx *p; - int i, n, tok; - char c; - FILE *fp; - char buf[200], name[200], def[200]; + int i, tok; + Biobuf *fp; + char *buf, *toks[3]; - printf("#include \n"); - printf("#include \"awk.h\"\n"); - printf("#include \"y.tab.h\"\n\n"); + print("#include \n"); + print("#include \n"); + print("#include \n"); + print("#include \"awk.h\"\n"); + print("#include \"y.tab.h\"\n\n"); for (i = SIZE; --i >= 0; ) names[i] = ""; - if ((fp = fopen("y.tab.h", "r")) == NULL) { - fprintf(stderr, "maketab can't open y.tab.h!\n"); - exit(1); + if ((fp = Bopen("y.tab.h", OREAD)) == nil) { + fprint(2, "maketab can't open y.tab.h!\n"); + exits("can't open y.tab.h"); } - printf("static char *printname[%d] = {\n", SIZE); + print("static char *printname[%d] = {\n", SIZE); i = 0; - while (fgets(buf, sizeof buf, fp) != NULL) { - n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); - if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ + while ((buf = Brdline(fp, '\n')) != nil) { + buf[Blinelen(fp)-1] = '\0'; + tokenize(buf, toks, 3); + if (toks[0] == nil || strcmp("#define", toks[0]) != 0) /* not a valid #define */ continue; + tok = strtol(toks[2], nil, 10); if (tok < FIRSTTOKEN || tok > LASTTOKEN) { - fprintf(stderr, "maketab funny token %d %s\n", tok, buf); - exit(1); + fprint(2, "maketab funny token %d %s\n", tok, buf); + exits("funny token"); } - names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); - strcpy(names[tok-FIRSTTOKEN], name); - printf("\t(char *) \"%s\",\t/* %d */\n", name, tok); + names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1); + strcpy(names[tok-FIRSTTOKEN], toks[1]); + print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok); i++; } - printf("};\n\n"); + print("};\n\n"); for (p=proc; p->token!=0; p++) table[p->token-FIRSTTOKEN] = p->name; - printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); + print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i=0; i LASTTOKEN) {\n"); - printf(" sprintf(buf, \"token %%d\", n);\n"); - printf(" return buf;\n"); - printf(" }\n"); - printf(" return printname[n-FIRSTTOKEN];\n"); - printf("}\n"); - return 0; + print("char *tokname(int n)\n"); /* print a tokname() function */ + print("{\n"); + print(" static char buf[100];\n\n"); + print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); + print(" sprint(buf, \"token %%d\", n);\n"); + print(" return buf;\n"); + print(" }\n"); + print(" return printname[n-FIRSTTOKEN];\n"); + print("}\n"); + exits(0); } diff --git a/sys/src/cmd/awk/mkfile b/sys/src/cmd/awk/mkfile index 35ee78d7c..9e30f0148 100644 --- a/sys/src/cmd/awk/mkfile +++ b/sys/src/cmd/awk/mkfile @@ -6,6 +6,7 @@ OFILES=re.$O\ main.$O\ parse.$O\ proctab.$O\ + popen.$O\ tran.$O\ lib.$O\ run.$O\ @@ -28,11 +29,6 @@ UPDATE=\ ${TARG:%=/386/bin/%}\ proctab.c +proctab.c: $O.maketab + ./$O.maketab >proctab.c -$cpuobjtype.maketab: y.tab.h maketab.c +$O.maketab: y.tab.h maketab.c objtype=$cputype mk maketab.$cputype diff --git a/sys/src/cmd/awk/parse.c b/sys/src/cmd/awk/parse.c index 2ee6eca62..3e42d2ba7 100644 --- a/sys/src/cmd/awk/parse.c +++ b/sys/src/cmd/awk/parse.c @@ -22,10 +22,9 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -#define DEBUG -#include -#include -#include +#include +#include +#include #include "awk.h" #include "y.tab.h" @@ -34,9 +33,9 @@ Node *nodealloc(int n) Node *x; x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); - if (x == NULL) + if (x == nil) FATAL("out of space in nodealloc"); - x->nnext = NULL; + x->nnext = nil; x->lineno = lineno; return(x); } @@ -220,11 +219,11 @@ Node *linkum(Node *a, Node *b) if (errorflag) /* don't link things that are wrong */ return a; - if (a == NULL) + if (a == nil) return(b); - else if (b == NULL) + else if (b == nil) return(a); - for (c = a; c->nnext != NULL; c = c->nnext) + for (c = a; c->nnext != nil; c = c->nnext) ; c->nnext = b; return(a); @@ -245,7 +244,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ for (p = vl; p; p = p->nnext) n++; v->fval = n; - dprintf( ("defining func %s (%d args)\n", v->nval, n) ); + dprint( ("defining func %s (%d args)\n", v->nval, n) ); } int isarg(char *s) /* is s in argument list for current function? */ @@ -262,7 +261,7 @@ int isarg(char *s) /* is s in argument list for current function? */ int ptoi(void *p) /* convert pointer to integer */ { - return (int) (long) p; /* swearing that p fits, of course */ + return (int) (vlong) p; /* swearing that p fits, of course */ } Node *itonp(int i) /* and vice versa */ diff --git a/sys/src/cmd/awk/popen.c b/sys/src/cmd/awk/popen.c new file mode 100644 index 000000000..bcb769ebd --- /dev/null +++ b/sys/src/cmd/awk/popen.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include "awk.h" + +#define MAXFORKS 20 +#define NSYSFILE 3 +#define tst(a,b) (mode == OREAD? (b) : (a)) +#define RDR 0 +#define WTR 1 + +struct a_fork { + short done; + short fd; + int pid; + char status[128]; +}; +static struct a_fork the_fork[MAXFORKS]; + +Biobuf* +popen(char *cmd, int mode) +{ + int p[2]; + int myside, hisside, pid; + int i, ind; + + for (ind = 0; ind < MAXFORKS; ind++) + if (the_fork[ind].pid == 0) + break; + if (ind == MAXFORKS) + return nil; + if(pipe(p) < 0) + return nil; + myside = tst(p[WTR], p[RDR]); + hisside = tst(p[RDR], p[WTR]); + switch (pid = fork()) { + case -1: + return nil; + case 0: + /* myside and hisside reverse roles in child */ + close(myside); + dup(hisside, tst(0, 1)); + for (i=NSYSFILE; ipid; + for (f = 0; f < MAXFORKS; f++) { + if (r == the_fork[f].pid) { + the_fork[f].done = 1; + strecpy(the_fork[f].status, the_fork[f].status+512, status->msg); + break; + } + } + free(status); + } while(r != the_fork[ind].pid && r != -1); + if(r == -1) + strcpy(the_fork[ind].status, "No loved ones to wait for"); + } + the_fork[ind].pid = 0; + if(the_fork[ind].status[0] != '\0') + return 1; + return 0; +} diff --git a/sys/src/cmd/awk/proto.h b/sys/src/cmd/awk/proto.h index 4566dd66c..6b14eadd1 100644 --- a/sys/src/cmd/awk/proto.h +++ b/sys/src/cmd/awk/proto.h @@ -44,7 +44,6 @@ extern void quoted(char **, char **, char *); extern int match(void *, char *, char *); extern int pmatch(void *, char *, char *); extern int nematch(void *, char *, char *); -extern int countposn(char *, int); extern void overflow(void); extern int pgetc(void); @@ -100,7 +99,7 @@ extern void makefields(int, int); extern void growfldtab(int n); extern int getrec(char **, int *, int); extern void nextfile(void); -extern int readrec(char **buf, int *bufsize, FILE *inf); +extern int readrec(char **buf, int *bufsize, Biobuf *inf); extern char *getargv(int); extern void setclvar(char *); extern void fldbld(void); @@ -110,7 +109,7 @@ extern int refldbld(char *, char *); extern void recbld(void); extern Cell *fieldadr(int); extern void yyerror(char *); -extern void fpecatch(int); +extern int handler(void*, char*); extern void bracecheck(void); extern void bcheck2(int, int, int); extern void SYNTAX(char *, ...); @@ -165,13 +164,13 @@ extern Cell *instat(Node **, int); extern Cell *bltin(Node **, int); extern Cell *printstat(Node **, int); extern Cell *nullproc(Node **, int); -extern FILE *redirect(int, Node *); -extern FILE *openfile(int, char *); -extern char *filename(FILE *); +extern Biobuf *redirect(int, Node *); +extern Biobuf *openfile(int, char *); +extern char *filename(Biobuf *); extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *sub(Node **, int); extern Cell *gsub(Node **, int); -extern FILE *popen(const char *, const char *); -extern int pclose(FILE *); +extern Biobuf *popen(char *, int); +extern int pclose(Biobuf *); diff --git a/sys/src/cmd/awk/re.c b/sys/src/cmd/awk/re.c index 32fe2af47..f93a8dd45 100644 --- a/sys/src/cmd/awk/re.c +++ b/sys/src/cmd/awk/re.c @@ -22,18 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ - -#define DEBUG -#include +#include +#include #include -#include -#include -#include -#include -#include +#include +#include #include "awk.h" #include "y.tab.h" -#include "regexp.h" /* This file provides the interface between the main body of * awk and the pattern matching package. It preprocesses @@ -198,11 +193,11 @@ pmatch(void *p, char *s, char *start) { Resub m; - m.s.sp = start; - m.e.ep = 0; + m.sp = start; + m.ep = 0; if (regexec((Reprog *) p, (char *) s, &m, 1)) { - patbeg = m.s.sp; - patlen = m.e.ep-m.s.sp; + patbeg = m.sp; + patlen = m.ep-m.sp; return 1; } patlen = -1; @@ -250,7 +245,7 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */ { char *p = *s; char *t = *to; - wchar_t c; + Rune c; switch(c = *p++) { case 't': @@ -273,8 +268,8 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */ *t++ = '\\'; if (c == 'x') { /* hexadecimal goo follows */ c = hexstr(&p); - if (t < end-MB_CUR_MAX) - t += wctomb(t, c); + if (t < end-UTFmax) + t += runelen(c); else overflow(); *to = t; *s = p; @@ -294,21 +289,6 @@ quoted(char **s, char **to, char *end) /* handle escaped sequence */ *s = p; *to = t; } - /* count rune positions */ -int -countposn(char *s, int n) -{ - int i, j; - char *end; - - for (i = 0, end = s+n; *s && s < end; i++){ - j = mblen(s, n); - if(j <= 0) - j = 1; - s += j; - } - return(i); -} /* pattern package error handler */ diff --git a/sys/src/cmd/awk/run.c b/sys/src/cmd/awk/run.c index a8c03f1b5..c9e90bc45 100644 --- a/sys/src/cmd/awk/run.c +++ b/sys/src/cmd/awk/run.c @@ -22,43 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -#define DEBUG -#include +#include +#include #include -#include -#include -#include -#include -#include -#include +#include #include "awk.h" #include "y.tab.h" -#define tempfree(x) if (istemp(x)) tfree(x); else - -/* -#undef tempfree - -void tempfree(Cell *p) { - if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { - WARNING("bad csub %d in Cell %d %s", - p->csub, p->ctype, p->sval); - } - if (istemp(p)) - tfree(p); -} -*/ - -#ifdef _NFILE -#ifndef FOPEN_MAX -#define FOPEN_MAX _NFILE -#endif -#endif - -#ifndef FOPEN_MAX -#define FOPEN_MAX 40 /* max number of open files */ -#endif - #ifndef RAND_MAX #define RAND_MAX 32767 /* all that ansi guarantees */ #endif @@ -66,7 +36,7 @@ void tempfree(Cell *p) { jmp_buf env; extern int pairstack[]; -Node *winner = NULL; /* root of parse tree */ +Node *winner = nil; /* root of parse tree */ Cell *tmps; /* free temporary cells for execution */ static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; @@ -87,7 +57,42 @@ static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; Cell *jret = &retcell; static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE }; -Node *curnode = NULL; /* the node being executed, for debugging */ +Node *curnode = nil; /* the node being executed, for debugging */ + +int +system(const char *s) +{ + char status[512], *statfld[5]; + int w, pid; + char cmd[30], *oty; + + oty = getenv("cputype"); + if(!oty) + return -1; + if(!s) + return 1; /* a command interpreter is available */ + pid = fork(); + snprint(cmd, sizeof cmd, "/%s/bin/ape/sh", oty); + if(pid == 0) { + execl(cmd, "sh", "-c", s, nil); + exits("exec"); + } + if(pid < 0){ + return -1; + } + for(;;) { + w = await(status, sizeof(status) - 1); + if(w == -1) + return -1; + tokenize(status, statfld, nelem(statfld)); + if(strtol(statfld[0], nil, 0) == pid) + break; + } + + if(*statfld[4] != '\0') + return 1; + return 0; +} /* buffer memory management */ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, @@ -110,7 +115,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); - if (tbuf == NULL) { + if (tbuf == nil) { if (whatrtn) FATAL("out of memory in %s", whatrtn); return 0; @@ -139,7 +144,7 @@ Cell *execute(Node *u) /* execute a node of the parse tree */ Cell *x; Node *a; - if (u == NULL) + if (u == nil) return(True); for (a = u; ; a = a->nnext) { curnode = a; @@ -164,14 +169,15 @@ Cell *execute(Node *u) /* execute a node of the parse tree */ return(x); if (isjump(x)) return(x); - if (a->nnext == NULL) + if (a->nnext == nil) return(x); - tempfree(x); + if(istemp(x)) + tfree(x); } } -Cell *program(Node **a, int n) /* execute an awk program */ +Cell *program(Node **a, int) /* execute an awk program */ { /* a[0] = BEGIN, a[1] = body, a[2] = END */ Cell *x; @@ -183,14 +189,16 @@ Cell *program(Node **a, int n) /* execute an awk program */ return(True); if (isjump(x)) FATAL("illegal break, continue, next or nextfile from BEGIN"); - tempfree(x); + if (istemp(x)) + tfree(x); } if (a[1] || a[2]) while (getrec(&record, &recsize, 1) > 0) { x = execute(a[1]); if (isexit(x)) break; - tempfree(x); + if (istemp(x)) + tfree(x); } ex: if (setjmp(env) != 0) /* handles exit within END */ @@ -199,7 +207,8 @@ Cell *program(Node **a, int n) /* execute an awk program */ x = execute(a[2]); if (isbreak(x) || isnext(x) || iscont(x)) FATAL("illegal break, continue, next or nextfile from END"); - tempfree(x); + if (istemp(x)) + tfree(x); } ex1: return(True); @@ -214,11 +223,11 @@ struct Frame { /* stack frame for awk function calls */ #define NARGS 50 /* max args in a call */ -struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ +struct Frame *frame = nil; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *fp = NULL; /* frame pointer. bottom level unused */ +struct Frame *fp = nil; /* frame pointer. bottom level unused */ -Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ +Cell *call(Node **a, int) /* function call. very kludgy and fragile */ { static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE }; int i, ncall, ndef; @@ -231,25 +240,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ s = fcn->nval; if (!isfcn(fcn)) FATAL("calling undefined function %s", s); - if (frame == NULL) { + if (frame == nil) { fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); - if (frame == NULL) + if (frame == nil) FATAL("out of space for stack frames calling %s", s); } - for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ + for (ncall = 0, x = a[1]; x != nil; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + dprint( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); - for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + for (i = 0, x = a[1]; x != nil; i++, x = x->nnext) { /* get call args */ + dprint( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); y = execute(x); oargs[i] = y; - dprintf( ("args[%d]: %s %f <%s>, t=%o\n", + dprint( ("args[%d]: %s %f <%s>, t=%o\n", i, y->nval, y->fval, isarr(y) ? "(array)" : y->sval, y->tval) ); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); @@ -257,7 +266,8 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = y; /* arrays by ref */ else args[i] = copycell(y); - tempfree(y); + if (istemp(y)) + tfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ args[i] = gettemp(); @@ -268,7 +278,7 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ int dfp = fp - frame; /* old index */ frame = (struct Frame *) realloc((char *) frame, (nframe += 100) * sizeof(struct Frame)); - if (frame == NULL) + if (frame == nil) FATAL("out of space for stack frames in %s", s); fp = frame + dfp; } @@ -277,9 +287,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ fp->nargs = ndef; /* number defined with (excess are locals) */ fp->retval = gettemp(); - dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprint( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + dprint( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); for (i = 0; i < ndef; i++) { Cell *t = fp->args[i]; @@ -288,25 +298,30 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (i >= ncall) { freesymtab(t); t->csub = CTEMP; - tempfree(t); + if (istemp(t)) + tfree(t); } else { oargs[i]->tval = t->tval; oargs[i]->tval &= ~(STR|NUM|DONTFREE); oargs[i]->sval = t->sval; - tempfree(t); + if (istemp(t)) + tfree(t); } } } else if (t != y) { /* kludge to prevent freeing twice */ t->csub = CTEMP; - tempfree(t); + if (istemp(t)) + tfree(t); } } - tempfree(fcn); + if (istemp(fcn)) + tfree(fcn); if (isexit(y) || isnext(y) || isnextfile(y)) return y; - tempfree(y); /* this can free twice! */ + if (istemp(y)) + tfree(y); /* this can free twice! */ z = fp->retval; /* return value */ - dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); + dprint( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); fp--; return(z); } @@ -318,7 +333,7 @@ Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ y = gettemp(); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ - y->sval = x->sval ? tostring(x->sval) : NULL; + y->sval = x->sval ? tostring(x->sval) : nil; y->fval = x->fval; y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ /* is DONTFREE right? */ @@ -329,7 +344,7 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); + dprint( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); if (n+1 > fp->nargs) FATAL("argument #%d of function %s was not supplied", n+1, fp->fcncell->nval); @@ -342,14 +357,15 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ switch (n) { case EXIT: - if (a[0] != NULL) { + if (a[0] != nil) { y = execute(a[0]); errorflag = (int) getfval(y); - tempfree(y); + if (istemp(y)) + tfree(y); } longjmp(env, 1); case RETURN: - if (a[0] != NULL) { + if (a[0] != nil) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(fp->retval, getsval(y)); @@ -362,7 +378,8 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ setfval(fp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); - tempfree(y); + if (istemp(y)) + tfree(y); } return(jret); case NEXT: @@ -384,33 +401,35 @@ Cell *getline(Node **a, int n) /* get next line from specific input */ { /* a[0] is variable, a[1] is operator, a[2] is filename */ Cell *r, *x; extern Cell **fldtab; - FILE *fp; + Biobuf *fp; char *buf; int bufsize = recsize; int mode; - if ((buf = (char *) malloc(bufsize)) == NULL) + if ((buf = (char *) malloc(bufsize)) == nil) FATAL("out of memory in getline"); - fflush(stdout); /* in case someone is waiting for a prompt */ + Bflush(&stdout); /* in case someone is waiting for a prompt */ r = gettemp(); - if (a[1] != NULL) { /* getline < file */ + if (a[1] != nil) { /* getline < file */ x = execute(a[2]); /* filename */ mode = ptoi(a[1]); if (mode == '|') /* input pipe */ mode = LE; /* arbitrary flag */ fp = openfile(mode, getsval(x)); - tempfree(x); - if (fp == NULL) + if (istemp(x)) + tfree(x); + if (fp == nil) n = -1; else n = readrec(&buf, &bufsize, fp); if (n <= 0) { ; - } else if (a[0] != NULL) { /* getline var sval)) { @@ -419,13 +438,14 @@ Cell *getline(Node **a, int n) /* get next line from specific input */ } } } else { /* bare getline; use current input */ - if (a[0] == NULL) /* getline */ + if (a[0] == nil) /* getline */ n = getrec(&record, &recsize, 1); else { /* getline var */ n = getrec(&buf, &bufsize, 0); x = execute(a[0]); setsval(x, buf); - tempfree(x); + if (istemp(x)) + tfree(x); } } setfval(r, (Awkfloat) n); @@ -433,14 +453,14 @@ Cell *getline(Node **a, int n) /* get next line from specific input */ return r; } -Cell *getnf(Node **a, int n) /* get NF */ +Cell *getnf(Node **a, int) /* get NF */ { if (donefld == 0) fldbld(); return (Cell *) a[0]; } -Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +Cell *array(Node **a, int) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *y, *z; char *s; @@ -449,7 +469,7 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ int bufsz = recsize; int nsub = strlen(*SUBSEP); - if ((buf = (char *) malloc(bufsz)) == NULL) + if ((buf = (char *) malloc(bufsz)) == nil) FATAL("out of memory in array"); x = execute(a[0]); /* Cell* for symbol table */ @@ -462,10 +482,11 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); - tempfree(y); + if (istemp(y)) + tfree(y); } if (!isarr(x)) { - dprintf( ("making %s into an array\n", x->nval) ); + dprint( ("making %s into an array\n", x->nval) ); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); @@ -475,12 +496,13 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); z->ctype = OCELL; z->csub = CVAR; - tempfree(x); + if (istemp(x)) + tfree(x); free(buf); return(z); } -Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +Cell *awkdelete(Node **a, int) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *y; Node *np; @@ -498,7 +520,7 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * } else { int bufsz = recsize; char *buf; - if ((buf = (char *) malloc(bufsz)) == NULL) + if ((buf = (char *) malloc(bufsz)) == nil) FATAL("out of memory in adelete"); buf[0] = 0; for (np = a[1]; np; np = np->nnext) { @@ -509,16 +531,18 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); - tempfree(y); + if (istemp(y)) + tfree(y); } freeelem(x, buf); free(buf); } - tempfree(x); + if (istemp(x)) + tfree(x); return True; } -Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ +Cell *intest(Node **a, int) /* a[0] is index (list), a[1] is symtab */ { Cell *x, *ap, *k; Node *p; @@ -529,14 +553,14 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ ap = execute(a[1]); /* array name */ if (!isarr(ap)) { - dprintf( ("making %s into an array\n", ap->nval) ); + dprint( ("making %s into an array\n", ap->nval) ); if (freeable(ap)) xfree(ap->sval); ap->tval &= ~(STR|NUM|DONTFREE); ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); } - if ((buf = (char *) malloc(bufsz)) == NULL) { + if ((buf = (char *) malloc(bufsz)) == nil) { FATAL("out of memory in intest"); } buf[0] = 0; @@ -546,14 +570,16 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0)) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); - tempfree(x); + if (istemp(x)) + tfree(x); if (p->nnext) strcat(buf, *SUBSEP); } k = lookup(buf, (Array *) ap->sval); - tempfree(ap); + if (istemp(ap)) + tfree(ap); free(buf); - if (k == NULL) + if (k == nil) return(False); else return(True); @@ -575,19 +601,21 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ y = execute(a[2]); /* a[2] = regular expr */ t = getsval(y); p = compre(t); - tempfree(y); + if (istemp(y)) + tfree(y); } if (n == MATCHFCN) i = pmatch(p, s, s); else i = match(p, s, s); - tempfree(x); + if (istemp(x)) + tfree(x); if (n == MATCHFCN) { - int start = countposn(s, patbeg-s)+1; + int start = utfnlen(s, patbeg-s)+1; if (patlen < 0) start = 0; setfval(rstartloc, (Awkfloat) start); - setfval(rlengthloc, (Awkfloat) countposn(patbeg, patlen)); + setfval(rlengthloc, (Awkfloat) utfnlen(patbeg, patlen)); x = gettemp(); x->tval = NUM; x->fval = start; @@ -606,20 +634,23 @@ Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ x = execute(a[0]); i = istrue(x); - tempfree(x); + if (istemp(x)) + tfree(x); switch (n) { case BOR: if (i) return(True); y = execute(a[1]); i = istrue(y); - tempfree(y); + if (istemp(y)) + tfree(y); if (i) return(True); else return(False); case AND: if ( !i ) return(False); y = execute(a[1]); i = istrue(y); - tempfree(y); + if (istemp(y)) + tfree(y); if (i) return(True); else return(False); case NOT: @@ -645,8 +676,10 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ } else { i = strcmp(getsval(x), getsval(y)); } - tempfree(x); - tempfree(y); + if (istemp(x)) + tfree(x); + if (istemp(y)) + tfree(y); switch (n) { case LT: if (i<0) return(True); else return(False); @@ -669,7 +702,7 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ void tfree(Cell *a) /* free a tempcell */ { if (freeable(a)) { - dprintf( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) ); + dprint( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) ); xfree(a->sval); } if (a == tmps) @@ -696,7 +729,7 @@ Cell *gettemp(void) /* get a tempcell */ return(x); } -Cell *indirect(Node **a, int n) /* $( a[0] ) */ +Cell *indirect(Node **a, int) /* $( a[0] ) */ { Cell *x; int m; @@ -707,16 +740,18 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */ if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ - tempfree(x); + if (istemp(x)) + tfree(x); x = fieldadr(m); x->ctype = OCELL; /* BUG? why are these needed? */ x->csub = CFLD; return(x); } -Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ +Cell *substr(Node **a, int) /* substr(a[0], a[1], a[2]) */ { int k, m, n; + Rune r; char *s, *p; int temp; Cell *x, *y, *z = 0; @@ -726,12 +761,16 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ if (a[2] != 0) z = execute(a[2]); s = getsval(x); - k = countposn(s, strlen(s)) + 1; + k = utfnlen(s, strlen(s)) + 1; if (k <= 1) { - tempfree(x); - tempfree(y); - if (a[2] != 0) - tempfree(z); + if (istemp(x)) + tfree(x); + if (istemp(y)) + tfree(y); + if (a[2] != 0) { + if (istemp(z)) + tfree(z); + } x = gettemp(); setsval(x, ""); return(x); @@ -741,31 +780,34 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ m = 1; else if (m > k) m = k; - tempfree(y); + if (istemp(y)) + tfree(y); if (a[2] != 0) { n = (int) getfval(z); - tempfree(z); + if (istemp(z)) + tfree(z); } else n = k - 1; if (n < 0) n = 0; else if (n > k - m) n = k - m; - dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); + dprint( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); y = gettemp(); while (*s && --m) - s += mblen(s, k); - for (p = s; *p && n--; p += mblen(p, k)) + s += chartorune(&r, s); + for (p = s; *p && n--; p += chartorune(&r, p)) ; temp = *p; /* with thanks to John Linderman */ *p = '\0'; setsval(y, s); *p = temp; - tempfree(x); + if (istemp(x)) + tfree(x); return(y); } -Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ +Cell *sindex(Node **a, int) /* index(a[0], a[1]) */ { Cell *x, *y, *z; char *s1, *s2, *p1, *p2, *q; @@ -781,12 +823,14 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) ; if (*p2 == '\0') { - v = (Awkfloat) countposn(s1, p1-s1) + 1; /* origin 1 */ + v = (Awkfloat) utfnlen(s1, p1-s1) + 1; /* origin 1 */ break; } } - tempfree(x); - tempfree(y); + if (istemp(x)) + tfree(x); + if (istemp(y)) + tfree(y); setfval(z, v); return(z); } @@ -798,7 +842,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi char *fmt; char *p, *t, *os; Cell *x; - int flag = 0, n; + int flag, n; int fmtwd; /* format width */ int fmtsz = recsize; char *buf = *pbuf; @@ -806,7 +850,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi os = s; p = buf; - if ((fmt = (char *) malloc(fmtsz)) == NULL) + if ((fmt = (char *) malloc(fmtsz)) == nil) FATAL("out of memory in format()"); while (*s) { adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format"); @@ -832,12 +876,13 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi if (*s == '*') { x = execute(a); a = a->nnext; - sprintf(t-1, "%d", fmtwd=(int) getfval(x)); + sprint(t-1, "%d", fmtwd=(int) getfval(x)); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); t = fmt + strlen(fmt); - tempfree(x); + if (istemp(x)) + tfree(x); } } *t = '\0'; @@ -856,7 +901,13 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi *t = 'd'; *++t = '\0'; break; - case 'o': case 'x': case 'X': case 'u': + case 'u': + flag = *(s-1) == 'l' ? 2 : 3; + *t++ = 'u'; + *t++ = 'd'; + *t = '\0'; + break; + case 'o': case 'x': case 'X': flag = *(s-1) == 'l' ? 2 : 3; break; case 's': @@ -870,7 +921,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi flag = 0; break; } - if (a == NULL) + if (a == nil) FATAL("not enough args in printf(%s)", os); x = execute(a); a = a->nnext; @@ -879,18 +930,18 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi n = fmtwd; adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format"); switch (flag) { - case 0: sprintf(p, "%s", fmt); /* unknown, so dump it too */ + case 0: sprint(p, "%s", fmt); /* unknown, so dump it too */ t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format"); p += strlen(p); - sprintf(p, "%s", t); + sprint(p, "%s", t); break; - case 1: sprintf(p, fmt, getfval(x)); break; - case 2: sprintf(p, fmt, (long) getfval(x)); break; - case 3: sprintf(p, fmt, (int) getfval(x)); break; + case 1: sprint(p, fmt, getfval(x)); break; + case 2: sprint(p, fmt, (long) getfval(x)); break; + case 3: sprint(p, fmt, (int) getfval(x)); break; case 4: t = getsval(x); n = strlen(t); @@ -898,21 +949,26 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi n = fmtwd; if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, 0)) FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); - sprintf(p, fmt, t); + sprint(p, fmt, t); break; case 5: if (isnum(x)) { - if (getfval(x)) - sprintf(p, fmt, (int) getfval(x)); - else{ + if (getfval(x)) { + *p++ = (uchar)getfval(x); + *p = '\0'; + } else { *p++ = '\0'; *p = '\0'; } - } else - sprintf(p, fmt, getsval(x)[0]); + } else { + if((*p = getsval(x)[0]) != '\0') + p++; + *p = '\0'; + } break; } - tempfree(x); + if (istemp(x)) + tfree(x); p += strlen(p); s++; } @@ -925,55 +981,56 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a) /* printf-like conversi return p - buf; } -Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ +Cell *awksprintf(Node **a, int) /* sprint(a[0]) */ { Cell *x; Node *y; char *buf; int bufsz=3*recsize; - if ((buf = (char *) malloc(bufsz)) == NULL) - FATAL("out of memory in awksprintf"); + if ((buf = (char *) malloc(bufsz)) == nil) + FATAL("out of memory in awksprint"); y = a[0]->nnext; x = execute(a[0]); if (format(&buf, &bufsz, getsval(x), y) == -1) - FATAL("sprintf string %.30s... too long. can't happen.", buf); - tempfree(x); + FATAL("sprint string %.30s... too long. can't happen.", buf); + if (istemp(x)) + tfree(x); x = gettemp(); x->sval = buf; x->tval = STR; return(x); } -Cell *awkprintf(Node **a, int n) /* printf */ +Cell *awkprintf(Node **a, int) /* printf */ { /* a[0] is list of args, starting with format string */ /* a[1] is redirection operator, a[2] is redirection file */ - FILE *fp; + Biobuf *fp; Cell *x; Node *y; char *buf; int len; int bufsz=3*recsize; - if ((buf = (char *) malloc(bufsz)) == NULL) + if ((buf = (char *) malloc(bufsz)) == nil) FATAL("out of memory in awkprintf"); y = a[0]->nnext; x = execute(a[0]); if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) FATAL("printf string %.30s... too long. can't happen.", buf); - tempfree(x); - if (a[1] == NULL) { + if (istemp(x)) + tfree(x); + if (a[1] == nil) { /* fputs(buf, stdout); */ - fwrite(buf, len, 1, stdout); - if (ferror(stdout)) + if (Bwrite(&stdout, buf, len) < 0) FATAL("write error on stdout"); + Bflush(&stdout); } else { fp = redirect(ptoi(a[1]), a[2]); /* fputs(buf, fp); */ - fwrite(buf, len, 1, fp); - fflush(fp); - if (ferror(fp)) + if(Bwrite(fp, buf, len) < 0) FATAL("write error on %s", filename(fp)); + Bflush(fp); } free(buf); return(True); @@ -987,11 +1044,13 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ x = execute(a[0]); i = getfval(x); - tempfree(x); + if (istemp(x)) + tfree(x); if (n != UMINUS) { y = execute(a[1]); j = getfval(y); - tempfree(y); + if (istemp(y)) + tfree(y); } z = gettemp(); switch (n) { @@ -1060,7 +1119,8 @@ Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ z = gettemp(); setfval(z, xf); setfval(x, xf + k); - tempfree(x); + if (istemp(x)) + tfree(x); return(z); } @@ -1074,8 +1134,8 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ x = execute(a[0]); if (n == ASSIGN) { /* ordinary assignment */ if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */ - ; /* leave alone unless it's a field */ - else if ((y->tval & (STR|NUM)) == (STR|NUM)) { + goto Free; /* leave alone unless it's a field */ + if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(x, getsval(y)); x->fval = getfval(y); x->tval |= NUM; @@ -1086,7 +1146,9 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ setfval(x, getfval(y)); else funnyvar(y, "read value of"); - tempfree(y); +Free: + if (istemp(y)) + tfree(y); return(x); } xf = getfval(x); @@ -1122,12 +1184,13 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ FATAL("illegal assignment operator %d", n); break; } - tempfree(y); + if (istemp(y)) + tfree(y); setfval(x, xf); return(x); } -Cell *cat(Node **a, int q) /* a[0] cat a[1] */ +Cell *cat(Node **a, int) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; @@ -1140,20 +1203,22 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ n1 = strlen(x->sval); n2 = strlen(y->sval); s = (char *) malloc(n1 + n2 + 1); - if (s == NULL) + if (s == nil) FATAL("out of space concatenating %.15s... and %.15s...", x->sval, y->sval); strcpy(s, x->sval); strcpy(s+n1, y->sval); - tempfree(y); + if (istemp(y)) + tfree(y); z = gettemp(); z->sval = s; z->tval = STR; - tempfree(x); + if (istemp(x)) + tfree(x); return(z); } -Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ +Cell *pastat(Node **a, int) /* a[0] { a[1] } */ { Cell *x; @@ -1162,14 +1227,15 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ else { x = execute(a[0]); if (istrue(x)) { - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[1]); } } return x; } -Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ +Cell *dopa2(Node **a, int) /* a[0], a[1] { a[2] } */ { Cell *x; int pair; @@ -1179,25 +1245,27 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ x = execute(a[0]); if (istrue(x)) pairstack[pair] = 1; - tempfree(x); + if (istemp(x)) + tfree(x); } if (pairstack[pair] == 1) { x = execute(a[1]); if (istrue(x)) pairstack[pair] = 0; - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[2]); return(x); } return(False); } -Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ +Cell *split(Node **a, int) /* split(a[0], a[1], a[2]); a[3] is type */ { Cell *x = 0, *y, *ap; char *s, *t, *fs = 0; char temp, num[50]; - int n, nb, sep, tempstat, arg3type; + int n, nb, sep, arg3type; y = execute(a[0]); /* source string */ s = getsval(y); @@ -1217,7 +1285,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ y->tval |= DONTFREE; /* split(a[x], a); */ freesymtab(ap); y->tval = n; - dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) ); + dprint( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) ); ap->tval &= ~STR; ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); @@ -1234,7 +1302,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ if (nematch(p,s,t)) { do { n++; - sprintf(num, "%d", n); + sprint(num, "%d", n); temp = *patbeg; *patbeg = '\0'; if (is_number(t)) @@ -1245,20 +1313,21 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ t = patbeg + patlen; if (t[-1] == 0 || *t == 0) { n++; - sprintf(num, "%d", n); + sprint(num, "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); goto spdone; } } while (nematch(p,s,t)); } n++; - sprintf(num, "%d", n); + sprint(num, "%d", n); if (is_number(t)) setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); spdone: - p = NULL; + p = nil; + USED(p); } else if (sep == ' ') { for (n = 0; ; ) { while (*s == ' ' || *s == '\t' || *s == '\n') @@ -1272,7 +1341,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); temp = *s; *s = '\0'; - sprintf(num, "%d", n); + sprint(num, "%d", n); if (is_number(t)) setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); else @@ -1287,7 +1356,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ char buf[UTFmax+1]; n++; - snprintf(num, sizeof num, "%d", n); + snprint(num, sizeof num, "%d", n); nb = chartorune(&r, s); memmove(buf, s, nb); buf[nb] = '\0'; @@ -1304,7 +1373,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ s++; temp = *s; *s = '\0'; - sprintf(num, "%d", n); + sprint(num, "%d", n); if (is_number(t)) setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); else @@ -1314,47 +1383,54 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ break; } } - tempfree(ap); - tempfree(y); + if (istemp(ap)) + tfree(ap); + if (istemp(y)) + tfree(y); if (a[2] != 0 && arg3type == STRING) - tempfree(x); + if (istemp(x)) + tfree(x); x = gettemp(); x->tval = NUM; x->fval = n; return(x); } -Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ +Cell *condexpr(Node **a, int) /* a[0] ? a[1] : a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[1]); } else { - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[2]); } return(x); } -Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ +Cell *ifstat(Node **a, int) /* if (a[0]) a[1]; else a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[1]); } else if (a[2] != 0) { - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[2]); } return(x); } -Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ +Cell *whilestat(Node **a, int) /* while (a[0]) a[1] */ { Cell *x; @@ -1362,7 +1438,8 @@ Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ x = execute(a[0]); if (!istrue(x)) return(x); - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[1]); if (isbreak(x)) { x = True; @@ -1370,11 +1447,12 @@ Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ } if (isnext(x) || isexit(x) || isret(x)) return(x); - tempfree(x); + if (istemp(x)) + tfree(x); } } -Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ +Cell *dostat(Node **a, int) /* do a[0]; while(a[1]) */ { Cell *x; @@ -1384,38 +1462,45 @@ Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ return True; if (isnext(x) || isnextfile(x) || isexit(x) || isret(x)) return(x); - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[1]); if (!istrue(x)) return(x); - tempfree(x); + if (istemp(x)) + tfree(x); } } -Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ +Cell *forstat(Node **a, int) /* for (a[0]; a[1]; a[2]) a[3] */ { Cell *x; x = execute(a[0]); - tempfree(x); + if (istemp(x)) + tfree(x); for (;;) { if (a[1]!=0) { x = execute(a[1]); - if (!istrue(x)) return(x); - else tempfree(x); + if (!istrue(x)) + return(x); + else if (istemp(x)) + tfree(x); } x = execute(a[3]); if (isbreak(x)) /* turn off break */ return True; if (isnext(x) || isexit(x) || isret(x)) return(x); - tempfree(x); + if (istemp(x)) + tfree(x); x = execute(a[2]); - tempfree(x); + if (istemp(x)) + tfree(x); } } -Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ +Cell *instat(Node **a, int) /* for (a[0] in a[1]) a[2] */ { Cell *x, *vp, *arrayp, *cp, *ncp; Array *tp; @@ -1427,36 +1512,40 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } tp = (Array *) arrayp->sval; - tempfree(arrayp); + if (istemp(arrayp)) + tfree(arrayp); for (i = 0; i < tp->size; i++) { /* this routine knows too much */ - for (cp = tp->tab[i]; cp != NULL; cp = ncp) { + for (cp = tp->tab[i]; cp != nil; cp = ncp) { setsval(vp, cp->nval); ncp = cp->cnext; x = execute(a[2]); if (isbreak(x)) { - tempfree(vp); + if (istemp(vp)) + tfree(vp); return True; } if (isnext(x) || isexit(x) || isret(x)) { - tempfree(vp); + if (istemp(vp)) + tfree(vp); return(x); } - tempfree(x); + if (istemp(x)) + tfree(x); } } return True; } -Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ +Cell *bltin(Node **a, int) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t; - wchar_t wc; + Rune wc; char *p, *buf; char mbc[50]; Node *nextarg; - FILE *fp; + Biobuf *fp; void flush_all(void); t = ptoi(a[0]); @@ -1465,10 +1554,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis switch (t) { case FLENGTH: if (isarr(x)) - u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ + u = ((Array *) x->sval)->nelemt; /* GROT. should be function*/ else { p = getsval(x); - u = (Awkfloat) countposn(p, strlen(p)); + u = (Awkfloat) utfnlen(p, strlen(p)); } break; case FLOG: @@ -1490,12 +1579,13 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis } else { y = execute(a[1]->nnext); u = atan2(getfval(x), getfval(y)); - tempfree(y); + if (istemp(y)) + tfree(y); nextarg = nextarg->nnext; } break; case FSYSTEM: - fflush(stdout); /* in case something is buffered already */ + Bflush(&stdout); /* in case something is buffered already */ u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */ break; case FRAND: @@ -1504,7 +1594,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis break; case FSRAND: if (isrec(x)) /* no argument provided */ - u = time((time_t *)0); + u = time(nil); else u = getfval(x); srand((unsigned int) u); @@ -1521,7 +1611,8 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis if (isupper(*p)) *p = tolower(*p); } - tempfree(x); + if (istemp(x)) + tfree(x); x = gettemp(); setsval(x, buf); free(buf); @@ -1530,15 +1621,16 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis if (isrec(x) || strlen(getsval(x)) == 0) { flush_all(); /* fflush() or fflush("") -> all */ u = 0; - } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) - u = EOF; + } else if ((fp = openfile(FFLUSH, getsval(x))) == nil) + u = Beof; else - u = fflush(fp); + u = Bflush(fp); break; case FUTF: wc = (int)getfval(x); - mbc[wctomb(mbc, wc)] = 0; - tempfree(x); + mbc[runetochar(mbc, &wc)] = 0; + if (istemp(x)) + tfree(x); x = gettemp(); setsval(x, mbc); return x; @@ -1546,7 +1638,8 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis FATAL("illegal function type %d", t); break; } - tempfree(x); + if (istemp(x)) + tfree(x); x = gettemp(); setfval(x, u); if (nextarg != 0) { @@ -1557,79 +1650,78 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis return(x); } -Cell *printstat(Node **a, int n) /* print a[0] */ +Cell *printstat(Node **a, int) /* print a[0] */ { int r; Node *x; Cell *y; - FILE *fp; + Biobuf *fp; if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */ - fp = stdout; + fp = &stdout; else fp = redirect(ptoi(a[1]), a[2]); - for (x = a[0]; x != NULL; x = x->nnext) { + for (x = a[0]; x != nil; x = x->nnext) { y = execute(x); - fputs(getsval(y), fp); - tempfree(y); - if (x->nnext == NULL) - r = fputs(*ORS, fp); + Bwrite(fp, getsval(y), strlen(getsval(y))); + if (istemp(y)) + tfree(y); + if (x->nnext == nil) + r = Bprint(fp, "%s", *ORS); else - r = fputs(*OFS, fp); - if (r == EOF) + r = Bprint(fp, "%s", *OFS); + if (r < 0) FATAL("write error on %s", filename(fp)); } - if (a[1] != 0) - if (fflush(fp) == EOF) - FATAL("write error on %s", filename(fp)); + if (Bflush(fp) < 0) + FATAL("write error on %s", filename(fp)); return(True); } -Cell *nullproc(Node **a, int n) +Cell *nullproc(Node **, int) { - n = n; - a = a; return 0; } -FILE *redirect(int a, Node *b) /* set up all i/o redirections */ +Biobuf *redirect(int a, Node *b) /* set up all i/o redirections */ { - FILE *fp; + Biobuf *fp; Cell *x; char *fname; x = execute(b); fname = getsval(x); fp = openfile(a, fname); - if (fp == NULL) + if (fp == nil) FATAL("can't open file %s", fname); - tempfree(x); + if (istemp(x)) + tfree(x); return fp; } struct files { - FILE *fp; + Biobuf *fp; char *fname; int mode; /* '|', 'a', 'w' => LE/LT, GT */ } files[FOPEN_MAX] ={ - { NULL, "/dev/stdin", LT }, /* watch out: don't free this! */ - { NULL, "/dev/stdout", GT }, - { NULL, "/dev/stderr", GT } + { nil, "/dev/stdin", LT }, /* watch out: don't free this! */ + { nil, "/dev/stdout", GT }, + { nil, "/dev/stderr", GT } }; void stdinit(void) /* in case stdin, etc., are not constants */ { - files[0].fp = stdin; - files[1].fp = stdout; - files[2].fp = stderr; + files[0].fp = &stdin; + files[1].fp = &stdout; + files[2].fp = &stderr; } -FILE *openfile(int a, char *us) +Biobuf *openfile(int a, char *us) { char *s = us; int i, m; - FILE *fp = 0; + Biobuf *fp = nil; if (*s == '\0') FATAL("null file name in print or getline"); @@ -1641,29 +1733,30 @@ FILE *openfile(int a, char *us) return files[i].fp; } if (a == FFLUSH) /* didn't find it, so don't create it! */ - return NULL; + return nil; for (i=0; i < FOPEN_MAX; i++) if (files[i].fp == 0) break; if (i >= FOPEN_MAX) FATAL("%s makes too many open files", s); - fflush(stdout); /* force a semblance of order */ + Bflush(&stdout); /* force a semblance of order */ m = a; if (a == GT) { - fp = fopen(s, "w"); + fp = Bopen(s, OWRITE); } else if (a == APPEND) { - fp = fopen(s, "a"); + fp = Bopen(s, OWRITE); + Bseek(fp, 0LL, 2); m = GT; /* so can mix > and >> */ } else if (a == '|') { /* output pipe */ - fp = popen(s, "w"); + fp = popen(s, OWRITE); } else if (a == LE) { /* input pipe */ - fp = popen(s, "r"); + fp = popen(s, OREAD); } else if (a == LT) { /* getline sval, files[i].fname) == 0) { - if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else - stat = fclose(files[i].fp); - if (stat == EOF) + stat = Bterm(files[i].fp); + if (stat == Beof) WARNING( "i/o error occurred closing %s", files[i].fname ); if (i > 2) /* don't do /dev/std... */ xfree(files[i].fname); - files[i].fname = NULL; /* watch out for ref thru this */ - files[i].fp = NULL; + files[i].fname = nil; /* watch out for ref thru this */ + files[i].fp = nil; } - tempfree(x); + if (istemp(x)) + tfree(x); return(True); } @@ -1714,13 +1805,11 @@ void closeall(void) for (i = 0; i < FOPEN_MAX; i++) if (files[i].fp) { - if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else - stat = fclose(files[i].fp); - if (stat == EOF) + stat = Bterm(files[i].fp); + if (stat < -1) WARNING( "i/o error occurred while closing %s", files[i].fname ); } } @@ -1731,12 +1820,12 @@ void flush_all(void) for (i = 0; i < FOPEN_MAX; i++) if (files[i].fp) - fflush(files[i].fp); + Bflush(files[i].fp); } void backsub(char **pb_ptr, char **sptr_ptr); -Cell *sub(Node **a, int nnn) /* substitute command */ +Cell *sub(Node **a, int) /* substitute command */ { char *sptr, *pb, *q; Cell *x, *y, *result; @@ -1744,7 +1833,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ void *p; int bufsz = recsize; - if ((buf = (char *) malloc(bufsz)) == NULL) + if ((buf = (char *) malloc(bufsz)) == nil) FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); @@ -1753,7 +1842,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ else { y = execute(a[1]); p = compre(getsval(y)); - tempfree(y); + if (istemp(y)) + tfree(y); } y = execute(a[2]); /* replacement string */ result = False; @@ -1790,22 +1880,24 @@ Cell *sub(Node **a, int nnn) /* substitute command */ setsval(x, buf); /* BUG: should be able to avoid copy */ result = True;; } - tempfree(x); - tempfree(y); + if (istemp(x)) + tfree(x); + if (istemp(y)) + tfree(y); free(buf); return result; } -Cell *gsub(Node **a, int nnn) /* global substitute */ +Cell *gsub(Node **a, int) /* global substitute */ { Cell *x, *y; - char *rptr, *sptr, *t, *pb, *c; + char *rptr, *sptr, *t, *pb, *c, *s; char *buf; void *p; int mflag, num; int bufsz = recsize; - if ((buf = (char *)malloc(bufsz)) == NULL) + if ((buf = (char *)malloc(bufsz)) == nil) FATAL("out of memory in gsub"); mflag = 0; /* if mflag == 0, can replace empty string */ num = 0; @@ -1815,8 +1907,10 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ p = (void *) a[1]; /* regular expression */ else { y = execute(a[1]); - p = compre(getsval(y)); - tempfree(y); + s = getsval(y); + p = compre(s); + if (istemp(y)) + tfree(y); } y = execute(a[2]); /* replacement string */ if (pmatch(p, t, c)) { @@ -1886,8 +1980,10 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb = '\0'; setsval(x, buf); /* BUG: should be able to avoid copy + free */ } - tempfree(x); - tempfree(y); + if (istemp(x)) + tfree(x); + if (istemp(y)) + tfree(y); x = gettemp(); x->tval = NUM; x->fval = num; diff --git a/sys/src/cmd/awk/tran.c b/sys/src/cmd/awk/tran.c index 48c7b8861..11a74c818 100644 --- a/sys/src/cmd/awk/tran.c +++ b/sys/src/cmd/awk/tran.c @@ -22,12 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -#define DEBUG -#include -#include +#include +#include #include -#include -#include +#include #include "awk.h" #include "y.tab.h" @@ -46,7 +44,7 @@ Awkfloat *NF; /* number of fields in current record */ Awkfloat *NR; /* number of current record */ Awkfloat *FNR; /* number of current record in current file */ char **FILENAME; /* current filename argument */ -Awkfloat *ARGC; /* number of arguments from command line */ +Awkfloat *AARGC; /* number of arguments from command line */ char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ Awkfloat *RLENGTH; /* length of same */ @@ -101,12 +99,12 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */ int i; char temp[50]; - ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; + AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; cp = setsymtab("ARGV", "", 0.0, ARR, symtab); ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ cp->sval = (char *) ARGVtab; for (i = 0; i < ac; i++) { - sprintf(temp, "%d", i); + sprint(temp, "%d", i); if (is_number(*av)) setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); else @@ -124,7 +122,7 @@ void envinit(char **envp) /* set up ENVIRON variable */ ENVtab = makesymtab(NSYMTAB); cp->sval = (char *) ENVtab; for ( ; *envp; envp++) { - if ((p = strchr(*envp, '=')) == NULL) + if ((p = strchr(*envp, '=')) == nil) continue; *p++ = 0; /* split into two strings at = */ if (is_number(p)) @@ -142,9 +140,9 @@ Array *makesymtab(int n) /* make a new symbol table */ ap = (Array *) malloc(sizeof(Array)); tp = (Cell **) calloc(n, sizeof(Cell *)); - if (ap == NULL || tp == NULL) + if (ap == nil || tp == nil) FATAL("out of space in makesymtab"); - ap->nelem = 0; + ap->nelemt = 0; ap->size = n; ap->tab = tp; return(ap); @@ -159,10 +157,10 @@ void freesymtab(Cell *ap) /* free a symbol table */ if (!isarr(ap)) return; tp = (Array *) ap->sval; - if (tp == NULL) + if (tp == nil) return; for (i = 0; i < tp->size; i++) { - for (cp = tp->tab[i]; cp != NULL; cp = temp) { + for (cp = tp->tab[i]; cp != nil; cp = temp) { xfree(cp->nval); if (freeable(cp)) xfree(cp->sval); @@ -178,14 +176,14 @@ void freesymtab(Cell *ap) /* free a symbol table */ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */ { Array *tp; - Cell *p, *prev = NULL; + Cell *p, *prev = nil; int h; tp = (Array *) ap->sval; h = hash(s, tp->size); - for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext) if (strcmp(s, p->nval) == 0) { - if (prev == NULL) /* 1st one */ + if (prev == nil) /* 1st one */ tp->tab[h] = p->cnext; else /* middle somewhere */ prev->cnext = p->cnext; @@ -193,7 +191,7 @@ void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */ xfree(p->sval); free(p->nval); free(p); - tp->nelem--; + tp->nelemt--; return; } } @@ -203,13 +201,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp) int h; Cell *p; - if (n != NULL && (p = lookup(n, tp)) != NULL) { - dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", + if (n != nil && (p = lookup(n, tp)) != nil) { + dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", p, p->nval, p->sval, p->fval, p->tval) ); return(p); } p = (Cell *) malloc(sizeof(Cell)); - if (p == NULL) + if (p == nil) FATAL("out of space for symbol table at %s", n); p->nval = tostring(n); p->sval = s ? tostring(s) : tostring(""); @@ -217,13 +215,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp) p->tval = t; p->csub = CUNK; p->ctype = OCELL; - tp->nelem++; - if (tp->nelem > FULLTAB * tp->size) + tp->nelemt++; + if (tp->nelemt > FULLTAB * tp->size) rehash(tp); h = hash(n, tp->size); p->cnext = tp->tab[h]; tp->tab[h] = p; - dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", + dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", p, p->nval, p->sval, p->fval, p->tval) ); return(p); } @@ -244,7 +242,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */ nsz = GROWTAB * tp->size; np = (Cell **) calloc(nsz, sizeof(Cell *)); - if (np == NULL) /* can't do it, but can keep running. */ + if (np == nil) /* can't do it, but can keep running. */ return; /* someone else will run out later. */ for (i = 0; i < tp->size; i++) { for (cp = tp->tab[i]; cp; cp = op) { @@ -265,10 +263,10 @@ Cell *lookup(char *s, Array *tp) /* look for s in tp */ int h; h = hash(s, tp->size); - for (p = tp->tab[h]; p != NULL; p = p->cnext) + for (p = tp->tab[h]; p != nil; p = p->cnext) if (strcmp(s, p->nval) == 0) return(p); /* found it */ - return(NULL); /* not found */ + return(nil); /* not found */ } Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ @@ -282,7 +280,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ fldno = atoi(vp->nval); if (fldno > *NF) newfld(fldno); - dprintf( ("setting field %d to %g\n", fldno, f) ); + dprint( ("setting field %d to %g\n", fldno, f) ); } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; @@ -291,7 +289,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ xfree(vp->sval); /* free any previous string */ vp->tval &= ~STR; /* mark string invalid */ vp->tval |= NUM; /* mark number ok */ - dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) ); + dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) ); return vp->fval = f; } @@ -310,7 +308,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */ char *t; int fldno; - dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) ); + dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) ); if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { @@ -318,7 +316,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */ fldno = atoi(vp->nval); if (fldno > *NF) newfld(fldno); - dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) ); + dprint( ("setting field %d to %s (%p)\n", fldno, s, s) ); } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; @@ -329,7 +327,7 @@ char *setsval(Cell *vp, char *s) /* set string val of a Cell */ if (freeable(vp)) xfree(vp->sval); vp->tval &= ~DONTFREE; - dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) ); + dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) ); return(vp->sval = t); } @@ -346,7 +344,7 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */ if (is_number(vp->sval) && !(vp->tval&CON)) vp->tval |= NUM; /* make NUM only sparingly */ } - dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) ); + dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) ); return(vp->fval); } @@ -365,14 +363,14 @@ char *getsval(Cell *vp) /* get string val of a Cell */ if (freeable(vp)) xfree(vp->sval); if (modf(vp->fval, &dtemp) == 0) /* it's integral */ - sprintf(s, "%.30g", vp->fval); + sprint(s, "%.30g", vp->fval); else - sprintf(s, *CONVFMT, vp->fval); + sprint(s, *CONVFMT, vp->fval); vp->sval = tostring(s); vp->tval &= ~DONTFREE; vp->tval |= STR; } - dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) ); + dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) ); return(vp->sval); } @@ -381,7 +379,7 @@ char *tostring(char *s) /* make a copy of string s */ char *p; p = (char *) malloc(strlen(s)+1); - if (p == NULL) + if (p == nil) FATAL("out of space in tostring on %s", s); strcpy(p, s); return(p); @@ -393,7 +391,7 @@ char *qstring(char *s, int delim) /* collect string up to next delim */ int c, n; char *buf, *bp; - if ((buf = (char *) malloc(strlen(s)+3)) == NULL) + if ((buf = (char *) malloc(strlen(s)+3)) == nil) FATAL( "out of space in qstring(%s)", s); for (bp = buf; (c = *s) != delim; s++) { if (c == '\n') @@ -429,6 +427,6 @@ char *qstring(char *s, int delim) /* collect string up to next delim */ } } } - *bp++ = 0; + *bp = 0; return buf; } diff --git a/sys/src/cmd/upas/bayes/addhash.c b/sys/src/cmd/upas/bayes/addhash.c index 91fb90dc3..b3f843ac3 100644 --- a/sys/src/cmd/upas/bayes/addhash.c +++ b/sys/src/cmd/upas/bayes/addhash.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include "regexp.h" #include "hash.h" Hash hash; diff --git a/sys/src/cmd/upas/bayes/bayes.c b/sys/src/cmd/upas/bayes/bayes.c index a04042906..e4ebad736 100644 --- a/sys/src/cmd/upas/bayes/bayes.c +++ b/sys/src/cmd/upas/bayes/bayes.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include "regexp.h" #include "hash.h" enum diff --git a/sys/src/cmd/upas/bayes/dfa.c b/sys/src/cmd/upas/bayes/dfa.c index 533d5958a..9c64c8cc2 100644 --- a/sys/src/cmd/upas/bayes/dfa.c +++ b/sys/src/cmd/upas/bayes/dfa.c @@ -2,8 +2,8 @@ #include #include #include -#include -#include "/sys/src/libregexp/regcomp.h" +#include "regexp.h" +#include "regcomp.h" #include "dfa.h" void rdump(Reprog*); diff --git a/sys/src/cmd/upas/bayes/dump.c b/sys/src/cmd/upas/bayes/dump.c index 9c7babd35..3753a9095 100644 --- a/sys/src/cmd/upas/bayes/dump.c +++ b/sys/src/cmd/upas/bayes/dump.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include "regexp.h" #include "/sys/src/libregexp/regcomp.h" #include "dfa.h" diff --git a/sys/src/cmd/upas/bayes/msgtok.c b/sys/src/cmd/upas/bayes/msgtok.c index cbba30dc8..9f640df04 100644 --- a/sys/src/cmd/upas/bayes/msgtok.c +++ b/sys/src/cmd/upas/bayes/msgtok.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include "regexp.h" #include #include "dfa.h" diff --git a/sys/src/cmd/upas/bayes/regcomp.c b/sys/src/cmd/upas/bayes/regcomp.c index 8afaf2157..1ef57a4cb 100644 --- a/sys/src/cmd/upas/bayes/regcomp.c +++ b/sys/src/cmd/upas/bayes/regcomp.c @@ -4,7 +4,7 @@ #include #include #include "regexp.h" -#include "/sys/src/libregexp/regcomp.h" +#include "regcomp.h" #define TRUE 1 #define FALSE 0 diff --git a/sys/src/cmd/upas/bayes/regcomp.h b/sys/src/cmd/upas/bayes/regcomp.h new file mode 100644 index 000000000..402fe7d5c --- /dev/null +++ b/sys/src/cmd/upas/bayes/regcomp.h @@ -0,0 +1,63 @@ +/* + * substitution list + */ +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (25*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/sys/src/cmd/upas/bayes/regen.c b/sys/src/cmd/upas/bayes/regen.c index 4f550095c..9bc7bdc85 100644 --- a/sys/src/cmd/upas/bayes/regen.c +++ b/sys/src/cmd/upas/bayes/regen.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include "regexp.h" #include "dfa.h" /*** diff --git a/sys/src/cmd/upas/bayes/regexp.h b/sys/src/cmd/upas/bayes/regexp.h new file mode 100644 index 000000000..780dc8001 --- /dev/null +++ b/sys/src/cmd/upas/bayes/regexp.h @@ -0,0 +1,66 @@ +#pragma src "/sys/src/oldlibregexp" +#pragma lib "oldlibregexp.a" + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }; + union + { + char *ep; + Rune *rep; + }; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp(char*); +extern Reprog *regcomplit(char*); +extern Reprog *regcompnl(char*); +extern void regerror(char*); +extern int regexec(Reprog*, char*, Resub*, int); +extern void regsub(char*, char*, int, Resub*, int); +extern int rregexec(Reprog*, Rune*, Resub*, int); +extern void rregsub(Rune*, Rune*, int, Resub*, int); diff --git a/sys/src/libregexp/mkfile b/sys/src/libregexp/mkfile index dcc457028..1b5cc7ddf 100644 --- a/sys/src/libregexp/mkfile +++ b/sys/src/libregexp/mkfile @@ -6,12 +6,12 @@ OFILES=\ regerror.$O\ regexec.$O\ regsub.$O\ - regaux.$O\ rregexec.$O\ rregsub.$O\ + regprint.$O\ HFILES=/sys/include/regexp.h\ - regcomp.h\ + regimpl.h\ UPDATE=\ mkfile\ @@ -20,9 +20,3 @@ UPDATE=\ ${LIB:/$objtype/%=/386/%}\ -#include -#include "regexp.h" -#include "regcomp.h" - - -/* - * save a new match in mp - */ -extern void -_renewmatch(Resub *mp, int ms, Resublist *sp) -{ - int i; - - if(mp==0 || ms<=0) - return; - if(mp[0].sp==0 || sp->m[0].spm[0].sp==mp[0].sp && sp->m[0].ep>mp[0].ep)){ - for(i=0; im[i]; - for(; iinst; p++){ - if(p->inst == ip){ - if(sep->m[0].sp < p->se.m[0].sp){ - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - (++p)->inst = 0; - return p; -} - -/* - * same as renewthread, but called with - * initial empty start pointer. - */ -extern Relist* -_renewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - char *sp) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(sp < p->se.m[0].sp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].sp = sp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].sp = sp; - (++p)->inst = 0; - return p; -} - -extern Relist* -_rrenewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - Rune *rsp) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(rsp < p->se.m[0].rsp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].rsp = rsp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].rsp = rsp; - (++p)->inst = 0; - return p; -} diff --git a/sys/src/libregexp/regcomp.c b/sys/src/libregexp/regcomp.c index e48ef15ac..c937ee5c9 100644 --- a/sys/src/libregexp/regcomp.c +++ b/sys/src/libregexp/regcomp.c @@ -1,567 +1,576 @@ #include #include -#include "regexp.h" -#include "regcomp.h" - -#define TRUE 1 -#define FALSE 0 - -/* - * Parser Information - */ -typedef -struct Node -{ - Reinst* first; - Reinst* last; -}Node; - -/* max character classes per program is nelem(reprog->class) */ -static Reprog *reprog; - -/* max rune ranges per character class is nelem(classp->spans)/2 */ -#define NCCRUNE nelem(classp->spans) - -#define NSTACK 20 -static Node andstack[NSTACK]; -static Node *andp; -static int atorstack[NSTACK]; -static int* atorp; -static int cursubid; /* id of current subexpression */ -static int subidstack[NSTACK]; /* parallel to atorstack */ -static int* subidp; -static int lastwasand; /* Last token was operand */ -static int nbra; -static char* exprp; /* pointer to next character in source expression */ -static int lexdone; -static int nclass; -static Reclass*classp; -static Reinst* freep; -static int errors; -static Rune yyrune; /* last lex'd rune */ -static Reclass*yyclassp; /* last lex'd class */ - -/* predeclared crap */ -static void operator(int); -static void pushand(Reinst*, Reinst*); -static void pushator(int); -static void evaluntil(int); -static int bldcclass(void); - -static jmp_buf regkaboom; - -static void -rcerror(char *s) +#include +#include "regimpl.h" + +int instrcnt[] = { + [TANY] 2, + [TBOL] 1, + [TCAT] 0, + [TCLASS] 1, + [TEOL] 1, + [TNOTNL] 1, + [TOR] 2, + [TPLUS] 1, + [TQUES] 1, + [TRUNE] 1, + [TSTAR] 2, + [TSUB] 2 +}; + +static Renode* +node(Parselex *plex, int op, Renode *l, Renode *r) { - errors++; - regerror(s); - longjmp(regkaboom, 1); + Renode *n; + + plex->instrs += instrcnt[op]; + n = plex->next++; + n->op = op; + n->left = l; + n->right = r; + return n; } -static Reinst* -newinst(int t) +static Renode* +e3(Parselex *plex) { - freep->type = t; - freep->left = 0; - freep->right = 0; - return freep++; + char error[128]; + Renode *n; + + switch(lex(plex)) { + case LANY: + return node(plex, TANY, nil, nil); + case LBOL: + return node(plex, TBOL, nil, nil); + case LEOL: + return node(plex, TEOL, nil, nil); + case LRUNE: + n = node(plex, TRUNE, nil, nil); + n->r = plex->rune; + return n; + case LCLASS: + if(plex->nc) + return buildclassn(plex); + return buildclass(plex); + case LLPAR: + n = e0(plex); + n = node(plex, TSUB, n, nil); + if(lex(plex) != LRPAR) { + snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig); + regerror(error); + longjmp(plex->exitenv, 1); + } + return n; + default: + if(plex->rune) + snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune); + else + snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig); + regerror(error); + longjmp(plex->exitenv, 1); + } + return nil; } -static void -operand(int t) +static Renode* +e2(Parselex *plex) { - Reinst *i; - - if(lastwasand) - operator(CAT); /* catenate is implicit */ - i = newinst(t); - - if(t == CCLASS || t == NCCLASS) - i->cp = yyclassp; - if(t == RUNE) - i->r = yyrune; - - pushand(i, i); - lastwasand = TRUE; + Renode *n; + + n = e3(plex); + if(lex(plex) == LREP) { + switch(plex->rune) { + case L'*': + return node(plex, TSTAR, n, nil); + case L'+': + return node(plex, TPLUS, n, nil); + case L'?': + return node(plex, TQUES, n, nil); + } + } + plex->peek = 1; + return n; } -static void -operator(int t) +static Renode* +invert(Renode *n) { - if(t==RBRA && --nbra<0) - rcerror("unmatched right paren"); - if(t==LBRA){ - if(++cursubid >= NSUBEXP) - rcerror ("too many subexpressions"); - nbra++; - if(lastwasand) - operator(CAT); - } else - evaluntil(t); - if(t != RBRA) - pushator(t); - lastwasand = FALSE; - if(t==STAR || t==QUEST || t==PLUS || t==RBRA) - lastwasand = TRUE; /* these look like operands */ + Renode *n1; + + if(n->op != TCAT) + return n; + while(n->left->op == TCAT) { + n1 = n->left; + n->left = n1->right; + n1->right = n; + n = n1; + } + return n; } -static void -regerr2(char *s, int c) +static Renode* +e1(Parselex *plex) { - char buf[100]; - char *cp = buf; - while(*s) - *cp++ = *s++; - *cp++ = c; - *cp = '\0'; - rcerror(buf); + Renode *n; + int sym; + + n = e2(plex); + for(;;) { + sym = lex(plex); + if(sym == LEND || sym == LOR || sym == LRPAR) + break; + plex->peek = 1; + n = node(plex, TCAT, n, e2(plex)); + } + plex->peek = 1; + return invert(n); } -static void -cant(char *s) +static Renode* +e0(Parselex *plex) { - char buf[100]; - strcpy(buf, "can't happen: "); - strcat(buf, s); - rcerror(buf); + Renode *n; + + n = e1(plex); + for(;;) { + if(lex(plex) != LOR) + break; + n = node(plex, TOR, n, e1(plex)); + } + plex->peek = 1; + return n; } -static void -pushand(Reinst *f, Reinst *l) +static Parselex* +initplex(Parselex *plex, char *regstr, int lit) { - if(andp >= &andstack[NSTACK]) - cant("operand stack overflow"); - andp->first = f; - andp->last = l; - andp++; + plex->getnextr = lit ? getnextrlit : getnextr; + plex->rawexp = plex->orig = regstr; + plex->sub = 0; + plex->instrs = 0; + plex->peek = 0; + plex->done = 0; + return plex; } -static void -pushator(int t) +static int +maxthreads(Renode *tree) { - if(atorp >= &atorstack[NSTACK]) - cant("operator stack overflow"); - *atorp++ = t; - *subidp++ = cursubid; + tree = tree->left; + if(tree->op == TCAT) + tree = tree->left; + if(tree->op == TBOL) + return 2; + return -1; } -static Node* -popand(int op) +static Reprog* +regcomp1(char *regstr, int nl, int lit) { - Reinst *inst; - - if(andp <= &andstack[0]){ - regerr2("missing operand for ", op); - inst = newinst(NOP); - pushand(inst,inst); + Reprog *reprog; + Parselex plex; + Renode *parsetr; + int regstrlen, maxthr; + + regstrlen = utflen(regstr); + initplex(&plex, regstr, lit); + plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2); + if(plex.nodes == nil) + return nil; + plex.next = plex.nodes; + + if(setjmp(plex.exitenv) != 0) { + free(plex.nodes); + return nil; } - return --andp; + + maxthr = regstrlen; + parsetr = node(&plex, TSUB, e0(&plex), nil); + +// prtree(parsetr, 0, 1); + reprog = malloc(sizeof(Reprog) + + sizeof(Reinst) * plex.instrs + + sizeof(Rethread) * maxthr); + reprog->len = plex.instrs; + reprog->nthr = maxthr; + reprog->startinst = compile(parsetr, reprog, nl); + reprog->threads = (Rethread*)(reprog->startinst + reprog->len); + reprog->regstr = regstr; + + free(plex.nodes); + return reprog; } -static int -popator(void) +Reprog* +regcomp(char *str) { - if(atorp <= &atorstack[0]) - cant("operator stack underflow"); - --subidp; - return *--atorp; + return regcomp1(str, 0, 0); } -static void -evaluntil(int pri) +Reprog* +regcomplit(char *str) { - Node *op1, *op2; - Reinst *inst1, *inst2; - - while(pri==RBRA || atorp[-1]>=pri){ - switch(popator()){ - default: - rcerror("unknown operator in evaluntil"); - break; - case LBRA: /* must have been RBRA */ - op1 = popand('('); - inst2 = newinst(RBRA); - inst2->subid = *subidp; - op1->last->next = inst2; - inst1 = newinst(LBRA); - inst1->subid = *subidp; - inst1->next = op1->first; - pushand(inst1, inst2); - return; - case OR: - op2 = popand('|'); - op1 = popand('|'); - inst2 = newinst(NOP); - op2->last->next = inst2; - op1->last->next = inst2; - inst1 = newinst(OR); - inst1->right = op1->first; - inst1->left = op2->first; - pushand(inst1, inst2); - break; - case CAT: - op2 = popand(0); - op1 = popand(0); - op1->last->next = op2->first; - pushand(op1->first, op2->last); - break; - case STAR: - op2 = popand('*'); - inst1 = newinst(OR); - op2->last->next = inst1; - inst1->right = op2->first; - pushand(inst1, inst1); - break; - case PLUS: - op2 = popand('+'); - inst1 = newinst(OR); - op2->last->next = inst1; - inst1->right = op2->first; - pushand(op2->first, inst1); - break; - case QUEST: - op2 = popand('?'); - inst1 = newinst(OR); - inst2 = newinst(NOP); - inst1->left = inst2; - inst1->right = op2->first; - op2->last->next = inst2; - pushand(inst1, inst2); - break; - } - } + return regcomp1(str, 0, 1); } -static Reprog* -optimize(Reprog *pp) +Reprog* +regcompnl(char *str) { - Reinst *inst, *target; - int size; - Reprog *npp; - Reclass *cl; - int diff; - - /* - * get rid of NOOP chains - */ - for(inst=pp->firstinst; inst->type!=END; inst++){ - target = inst->next; - while(target->type == NOP) - target = target->next; - inst->next = target; - } - - /* - * The original allocation is for an area larger than - * necessary. Reallocate to the actual space used - * and then relocate the code. - */ - size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); - npp = realloc(pp, size); - if(npp==0 || npp==pp) - return pp; - diff = (char *)npp - (char *)pp; - freep = (Reinst *)((char *)freep + diff); - for(inst=npp->firstinst; insttype){ - case OR: - case STAR: - case PLUS: - case QUEST: - *(char **)&inst->right += diff; - break; - case CCLASS: - case NCCLASS: - *(char **)&inst->right += diff; - cl = inst->cp; - *(char **)&cl->end += diff; - break; - } - *(char **)&inst->left += diff; - } - *(char **)&npp->startinst += diff; - return npp; + return regcomp1(str, 1, 0); } -#ifdef DEBUG -static void -dumpstack(void){ - Node *stk; - int *ip; - - print("operators\n"); - for(ip=atorstack; ipfirst->type, stk->last->type); +static Reinst* +compile1(Renode *renode, Reinst *reinst, int *sub, int nl) +{ + Reinst *i; + int s; + +Tailcall: + if(renode == nil) + return reinst; + switch(renode->op) { + case TCLASS: + reinst->op = OCLASS; + reinst->r = renode->r; + reinst->r1 = renode->r1; + reinst->a = reinst + 1 + renode->nclass; + renode = renode->left; + reinst++; + goto Tailcall; + case TCAT: + reinst = compile1(renode->left, reinst, sub, nl); + renode = renode->right; + goto Tailcall; + case TOR: + reinst->op = OSPLIT; + reinst->a = reinst + 1; + i = compile1(renode->left, reinst->a, sub, nl); + reinst->b = i + 1; + i->op = OJMP; + i->a = compile1(renode->right, reinst->b, sub, nl); + return i->a; + case TSTAR: + reinst->op = OSPLIT; + reinst->a = reinst + 1; + i = compile1(renode->left, reinst->a, sub, nl); + reinst->b = i + 1; + i->op = OJMP; + i->a = reinst; + return reinst->b; + case TPLUS: + i = reinst; + reinst = compile1(renode->left, reinst, sub, nl); + reinst->op = OSPLIT; + reinst->a = i; + reinst->b = reinst + 1; + return reinst->b; + case TQUES: + reinst->op = OSPLIT; + reinst->a = reinst + 1; + reinst->b = compile1(renode->left, reinst->a, sub, nl); + return reinst->b; + case TSUB: + reinst->op = OSAVE; + reinst->sub = s = (*sub)++; + reinst = compile1(renode->left, reinst+1, sub, nl); + reinst->op = OUNSAVE; + reinst->sub = s; + return reinst + 1; + case TANY: + if(nl == 0) + reinst++->op = ONOTNL; + reinst->op = OANY; + return reinst + 1; + case TRUNE: + reinst->op = ORUNE; + reinst->r = renode->r; + return reinst + 1; + case TNOTNL: + reinst->op = ONOTNL; + return reinst + 1; + case TEOL: + reinst->op = OEOL; + return reinst + 1; + case TBOL: + reinst->op = OBOL; + return reinst + 1; + } + return nil; } -static void -dump(Reprog *pp) +static Reinst* +compile(Renode *parsetr, Reprog *reprog, int nl) { - Reinst *l; - Rune *p; + Reinst *reinst; + int sub; - l = pp->firstinst; - do{ - print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, - l->left-pp->firstinst, l->right-pp->firstinst); - if(l->type == RUNE) - print("\t%C\n", l->r); - else if(l->type == CCLASS || l->type == NCCLASS){ - print("\t["); - if(l->type == NCCLASS) - print("^"); - for(p = l->cp->spans; p < l->cp->end; p += 2) - if(p[0] == p[1]) - print("%C", p[0]); - else - print("%C-%C", p[0], p[1]); - print("]\n"); - } else - print("\n"); - }while(l++->type); + sub = 0; + reinst = (Reinst*)(reprog+1); + compile1(parsetr, reinst, &sub, nl); + return reinst; } -#endif -static Reclass* -newclass(void) +static void +getnextr(Parselex *l) { - if(nclass >= nelem(reprog->class)) - rcerror("too many character classes; increase Reprog.class size"); - return &(classp[nclass++]); + l->literal = 0; + if(l->done) { + l->rune = 0; + return; + } + l->rawexp += chartorune(&l->rune, l->rawexp); + if(l->rune == L'\\') { + l->rawexp += chartorune(&l->rune, l->rawexp); + l->literal = 1; + } + if(*l->rawexp == 0) + l->done = 1; + return; } -static int -nextc(Rune *rp) +static void +getnextrlit(Parselex *l) { - if(lexdone){ - *rp = 0; - return 1; - } - exprp += chartorune(rp, exprp); - if(*rp == L'\\'){ - exprp += chartorune(rp, exprp); - return 1; + l->literal = 1; + if(l->done) { + l->literal = 0; + l->rune = 0; + return; } - if(*rp == 0) - lexdone = 1; - return 0; + l->rawexp += chartorune(&l->rune, l->rawexp); + if(*l->rawexp == 0) + l->done = 1; + return; } -static int -lex(int literal, int dot_type) +static int +lex(Parselex *l) { - int quoted; - - quoted = nextc(&yyrune); - if(literal || quoted){ - if(yyrune == 0) - return END; - return RUNE; + if(l->peek) { + l->peek = 0; + return l->peeklex; } - - switch(yyrune){ + l->getnextr(l); + if(l->literal) + return l->peeklex = LRUNE; + switch(l->rune){ case 0: - return END; + return l->peeklex = LEND; case L'*': - return STAR; case L'?': - return QUEST; case L'+': - return PLUS; + return l->peeklex = LREP; case L'|': - return OR; + return l->peeklex = LOR; case L'.': - return dot_type; + return l->peeklex = LANY; case L'(': - return LBRA; + return l->peeklex = LLPAR; case L')': - return RBRA; + return l->peeklex = LRPAR; case L'^': - return BOL; + return l->peeklex = LBOL; case L'$': - return EOL; + return l->peeklex = LEOL; case L'[': - return bldcclass(); + getclass(l); + return l->peeklex = LCLASS; } - return RUNE; + return l->peeklex = LRUNE; } static int -bldcclass(void) +pcmp(void *va, void *vb) { - int type; - Rune r[NCCRUNE]; - Rune *p, *ep, *np; - Rune rune; - int quoted; - - /* we have already seen the '[' */ - type = CCLASS; - yyclassp = newclass(); - - /* look ahead for negation */ - /* SPECIAL CASE!!! negated classes don't match \n */ - ep = r; - quoted = nextc(&rune); - if(!quoted && rune == L'^'){ - type = NCCLASS; - quoted = nextc(&rune); - *ep++ = L'\n'; - *ep++ = L'\n'; - } - - /* parse class into a set of spans */ - while(ep < &r[NCCRUNE-1]){ - if(rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - if(!quoted && rune == L']') - break; - if(!quoted && rune == L'-'){ - if(ep == r){ - rcerror("malformed '[]'"); - return 0; - } - quoted = nextc(&rune); - if((!quoted && rune == L']') || rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - *(ep-1) = rune; - } else { - *ep++ = rune; - *ep++ = rune; - } - quoted = nextc(&rune); - } - if(ep >= &r[NCCRUNE-1]) { - rcerror("char class too large; increase Reclass.spans size"); - return 0; - } - - /* sort on span start */ - for(p = r; p < ep; p += 2){ - for(np = p; np < ep; np += 2) - if(*np < *p){ - rune = np[0]; - np[0] = p[0]; - p[0] = rune; - rune = np[1]; - np[1] = p[1]; - p[1] = rune; - } - } + vlong n; + Rune *a, *b; - /* merge spans */ - np = yyclassp->spans; - p = r; - if(r == ep) - yyclassp->end = np; - else { - np[0] = *p++; - np[1] = *p++; - for(; p < ep; p += 2) - /* overlapping or adjacent ranges? */ - if(p[0] <= np[1] + 1){ - if(p[1] >= np[1]) - np[1] = p[1]; /* coalesce */ - } else { - np += 2; - np[0] = p[0]; - np[1] = p[1]; - } - yyclassp->end = np+2; - } + a = va; + b = vb; - return type; + n = (vlong)b[0] - (vlong)a[0]; + if(n) + return n; + return (vlong)b[1] - (vlong)a[1]; } -static Reprog* -regcomp1(char *s, int literal, int dot_type) +static void +getclass(Parselex *l) { - int token; - Reprog *pp; - - /* get memory for the program */ - pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); - if(pp == 0){ - regerror("out of memory"); - return 0; + Rune *p, *q, t; + + l->nc = 0; + getnextrlit(l); + if(l->rune == L'^') { + l->nc = 1; + getnextrlit(l); } - freep = pp->firstinst; - classp = pp->class; - errors = 0; - - if(setjmp(regkaboom)) - goto out; - - /* go compile the sucker */ - lexdone = 0; - exprp = s; - nclass = 0; - nbra = 0; - atorp = atorstack; - andp = andstack; - subidp = subidstack; - lastwasand = FALSE; - cursubid = 0; - - /* Start with a low priority operator to prime parser */ - pushator(START-1); - while((token = lex(literal, dot_type)) != END){ - if((token&0300) == OPERATOR) - operator(token); - else - operand(token); + p = l->cpairs; + for(;;) { + *p = l->rune; + if(l->rune == L']') + break; + if(l->rune == L'-') { + regerror("Malformed class"); + longjmp(l->exitenv, 1); + } + if(l->rune == '\\') { + getnextrlit(l); + *p = l->rune; + } + if(l->rune == 0) { + regerror("No closing ] for class"); + longjmp(l->exitenv, 1); + } + getnextrlit(l); + if(l->rune == L'-') { + getnextrlit(l); + if(l->rune == L']') { + regerror("Malformed class"); + longjmp(l->exitenv, 1); + } + if(l->rune == L'-') { + regerror("Malformed class"); + longjmp(l->exitenv, 1); + } + if(l->rune == L'\\') + getnextrlit(l); + p[1] = l->rune; + if(p[0] > p[1]) { + t = p[0]; + p[0] = p[1]; + p[1] = t; + } + getnextrlit(l); + } else + p[1] = p[0]; + if(p >= l->cpairs + nelem(l->cpairs) - 2) { + regerror("Class too big\n"); + longjmp(l->exitenv, 1); + } + p += 2; } - - /* Close with a low priority operator */ - evaluntil(START); - - /* Force END */ - operand(END); - evaluntil(START); -#ifdef DEBUG - dumpstack(); -#endif - if(nbra) - rcerror("unmatched left paren"); - --andp; /* points to first and only operand */ - pp->startinst = andp->first; -#ifdef DEBUG - dump(pp); -#endif - pp = optimize(pp); -#ifdef DEBUG - print("start: %d\n", andp->first-pp->firstinst); - dump(pp); -#endif -out: - if(errors){ - free(pp); - pp = 0; + *p = L'\0'; + qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp); + q = l->cpairs; + for(p = l->cpairs+2; *p != 0; p += 2) { + if(p[1] < q[0] - 1) { + q[2] = p[0]; + q[3] = p[1]; + q += 2; + continue; + } + q[0] = p[0]; + if(p[1] > q[1]) + q[1] = p[1]; } - return pp; + q[2] = 0; } -extern Reprog* -regcomp(char *s) +/* classes are in descending order */ +static Renode* +buildclassn(Parselex *l) { - return regcomp1(s, 0, ANY); + Renode *n; + Rune *p; + int i; + + i = 0; + p = l->cpairs; + n = node(l, TCLASS, nil, nil); + n->r = p[1] + 1; + n->r1 = Runemax; + n->nclass = i++; + + for(; *p != 0; p += 2) { + n = node(l, TCLASS, n, nil); + n->r = p[3] + 1; + n->r1 = p[0] - 1; + n->nclass = i++; + } + n->r = 0; + return node(l, TCAT, node(l, TNOTNL, nil, nil), n); } -extern Reprog* -regcomplit(char *s) +static Renode* +buildclass(Parselex *l) { - return regcomp1(s, 1, ANY); + Renode *n; + Rune *p; + int i; + + i = 0; + n = node(l, TCLASS, nil, nil); + n->r = Runemax + 1; + n->nclass = i++; + + for(p = l->cpairs; *p != 0; p += 2) { + n = node(l, TCLASS, n, nil); + n->r = p[0]; + n->r1 = p[1]; + n->nclass = i++; + } + return n; } -extern Reprog* -regcompnl(char *s) +static void +prtree(Renode *tree, int d, int f) { - return regcomp1(s, 0, ANYNL); + int i; + + if(tree == nil) + return; + if(f) + for(i = 0; i < d; i++) + print("\t"); + switch(tree->op) { + case TCAT: + prtree(tree->left, d, 0); + prtree(tree->right, d, 1); + break; + case TOR: + print("TOR\n"); + prtree(tree->left, d+1, 1); + for(i = 0; i < d; i++) + print("\t"); + print("|\n"); + prtree(tree->right, d+1, 1); + break; + case TSTAR: + print("*\n"); + prtree(tree->left, d+1, 1); + break; + case TPLUS: + print("+\n"); + prtree(tree->left, d+1, 1); + break; + case TQUES: + print("?\n"); + prtree(tree->left, d+1, 1); + break; + case TANY: + print(".\n"); + prtree(tree->left, d+1, 1); + break; + case TBOL: + print("^\n"); + break; + case TEOL: + print("$\n"); + break; + case TSUB: + print("TSUB\n"); + prtree(tree->left, d+1, 1); + break; + case TRUNE: + print("TRUNE: %C\n", tree->r); + break; + case TNOTNL: + print("TNOTNL: !\\n\n"); + break; + case TCLASS: + print("CLASS: %C-%C\n", tree->r, tree->r1); + prtree(tree->left, d, 1); + break; + } } diff --git a/sys/src/libregexp/regcomp.h b/sys/src/libregexp/regcomp.h deleted file mode 100644 index 402fe7d5c..000000000 --- a/sys/src/libregexp/regcomp.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * substitution list - */ -#define NSUBEXP 32 -typedef struct Resublist Resublist; -struct Resublist -{ - Resub m[NSUBEXP]; -}; - -/* - * Actions and Tokens (Reinst types) - * - * 02xx are operators, value == precedence - * 03xx are tokens, i.e. operands for operators - */ -#define RUNE 0177 -#define OPERATOR 0200 /* Bitmask of all operators */ -#define START 0200 /* Start, used for marker on stack */ -#define RBRA 0201 /* Right bracket, ) */ -#define LBRA 0202 /* Left bracket, ( */ -#define OR 0203 /* Alternation, | */ -#define CAT 0204 /* Concatentation, implicit operator */ -#define STAR 0205 /* Closure, * */ -#define PLUS 0206 /* a+ == aa* */ -#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0300 /* Any character except newline, . */ -#define ANYNL 0301 /* Any character including newline, . */ -#define NOP 0302 /* No operation, internal use only */ -#define BOL 0303 /* Beginning of line, ^ */ -#define EOL 0304 /* End of line, $ */ -#define CCLASS 0305 /* Character class, [] */ -#define NCCLASS 0306 /* Negated character class, [] */ -#define END 0377 /* Terminate: match found */ - -/* - * regexec execution lists - */ -#define LISTSIZE 10 -#define BIGLISTSIZE (25*LISTSIZE) -typedef struct Relist Relist; -struct Relist -{ - Reinst* inst; /* Reinstruction of the thread */ - Resublist se; /* matched subexpressions in this thread */ -}; -typedef struct Reljunk Reljunk; -struct Reljunk -{ - Relist* relist[2]; - Relist* reliste[2]; - int starttype; - Rune startchar; - char* starts; - char* eol; - Rune* rstarts; - Rune* reol; -}; - -extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); -extern void _renewmatch(Resub*, int, Resublist*); -extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); -extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/sys/src/libregexp/regerror.c b/sys/src/libregexp/regerror.c index 92b73a0d9..1e0eef175 100644 --- a/sys/src/libregexp/regerror.c +++ b/sys/src/libregexp/regerror.c @@ -1,6 +1,6 @@ #include #include -#include "regexp.h" +#include void regerror(char *s) diff --git a/sys/src/libregexp/regexec.c b/sys/src/libregexp/regexec.c index b806ba60a..89501de6a 100644 --- a/sys/src/libregexp/regexec.c +++ b/sys/src/libregexp/regexec.c @@ -1,232 +1,201 @@ #include #include -#include "regexp.h" -#include "regcomp.h" +#include +#include "regimpl.h" +typedef struct RethreadQ RethreadQ; +struct RethreadQ +{ + Rethread *head; + Rethread **tail; +}; -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -regexec1(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) +int +regexec(Reprog *prog, char *str, Resub *sem, int msize) { - int flag=0; - Reinst *inst; - Relist *tlp; - char *s; - int i, checkstart; - Rune r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - char *p; + RethreadQ lists[2], *clist, *nlist, *tmp; + Rethread *t, *next, *pooltop, *avail; + Reinst *curinst; + Rune r; + char *sp, *ep, endc; + int i, match, first, gen, matchpri, pri; - match = 0; - checkstart = j->starttype; - if(mp) - for(i=0; irelist[0][0].inst = 0; - j->relist[1][0].inst = 0; + if(msize > NSUBEXPM) + msize = NSUBEXPM; - /* Execute machine once for each character, including terminal NUL */ - s = j->starts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = utfrune(s, j->startchar); - if(p == 0 || s == j->eol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = utfrune(s, '\n'); - if(p == 0 || s == j->eol) - return match; - s = p+1; - break; - } - } - r = *(uchar*)s; - if(r < Runeself) - n = 1; - else - n = chartorune(&r, s); + if(prog->startinst->gen != 0) { + for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) + curinst->gen = 0; + } - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + clist = lists; + clist->head = nil; + clist->tail = &clist->head; + nlist = lists + 1; + nlist->head = nil; + nlist->tail = &nlist->head; - /* Add first instruction to current list */ - if(match == 0) - _renewemptythread(tl, progp->startinst, ms, s); + pooltop = prog->threads + prog->nthr; + avail = nil; - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - for(inst = tlp->inst; ; inst = inst->next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r == r){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - } - break; - case LBRA: - tlp->se.m[inst->subid].sp = s; - continue; - case RBRA: - tlp->se.m[inst->subid].ep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->eol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].ep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; + pri = matchpri = gen = match = 0; + sp = str; + ep = nil; + endc = '\0'; + if(sem != nil && msize > 0) { + if(sem->sp != nil) + sp = sem->sp; + if(sem->ep != nil && *sem->ep != '\0') { + ep = sem->ep; + endc = *sem->ep; + *sem->ep = '\0'; + } + } + r = Runemax + 1; + for(; r != L'\0'; sp += i) { + gen++; + i = chartorune(&r, sp); + first = 1; + t = clist->head; + if(t == nil) + goto Start; + curinst = t->pc; +Again: + if(curinst->gen == gen) + goto Done; + curinst->gen = gen; + switch(curinst->op) { + case ORUNE: + if(r != curinst->r) + goto Done; + case OANY: /* fallthrough */ + Any: + next = t->next; + t->pc = curinst + 1; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(next == nil) + break; + t = next; + curinst = t->pc; + goto Again; + case OCLASS: + Class: + if(r < curinst->r) + goto Done; + if(r > curinst->r1) { + curinst++; + goto Class; + } + next = t->next; + t->pc = curinst->a; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(next == nil) + break; + t = next; + curinst = t->pc; + goto Again; + case ONOTNL: + if(r != L'\n') { + curinst++; + goto Again; + } + goto Done; + case OBOL: + if(sp == str || sp[-1] == '\n') { + curinst++; + goto Again; + } + goto Done; + case OEOL: + if(r == L'\0' && ep == nil) { + curinst++; + goto Again; + } + if(r == L'\n') + goto Any; + goto Done; + case OJMP: + curinst = curinst->a; + goto Again; + case OSPLIT: + if(avail == nil) + next = --pooltop; + else { + next = avail; + avail = avail->next; + } + next->pc = curinst->b; + if(msize > 0) + memcpy(next->sem, t->sem, sizeof(Resub)*msize); + next->pri = t->pri; + next->next = t->next; + t->next = next; + curinst = curinst->a; + goto Again; + case OSAVE: + if(curinst->sub < msize) + t->sem[curinst->sub].sp = sp; + curinst++; + goto Again; + case OUNSAVE: + if(curinst->sub == 0) { + /* "Highest" priority is the left-most longest. */ + if (t->pri > matchpri) + goto Done; + match = 1; + matchpri = t->pri; + if(sem != nil && msize > 0) { + memcpy(sem, t->sem, sizeof(Resub)*msize); + sem->ep = sp; } + goto Done; + } + if(curinst->sub < msize) + t->sem[curinst->sub].ep = sp; + curinst++; + goto Again; + Done: + next = t->next; + t->next = avail; + avail = t; + if(next == nil) break; + t = next; + curinst = t->pc; + goto Again; + } +Start: + /* Start again once if we haven't found anything. */ + if(first == 1 && match == 0) { + first = 0; + if(avail == nil) + t = --pooltop; + else { + t = avail; + avail = avail->next; } + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + /* "Lower" priority thread */ + t->pri = matchpri = pri++; + t->next = nil; + curinst = prog->startinst; + goto Again; } - if(s == j->eol) + /* If we have a match and no extant threads, we are done. */ + if(match == 1 && nlist->head == nil) break; - checkstart = j->starttype && nl->inst==0; - s += n; - }while(r); - return match; -} - -static int -regexec2(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) -{ - int rv; - Relist *relist0, *relist1; - - /* mark space */ - relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist0 == nil) - return -1; - relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist1 == nil){ - free(relist1); - return -1; - } - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + BIGLISTSIZE - 2; - j->reliste[1] = relist1 + BIGLISTSIZE - 2; - - rv = regexec1(progp, bol, mp, ms, j); - free(relist0); - free(relist1); - return rv; -} - -extern int -regexec(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - j.starts = bol; - j.eol = 0; - if(mp && ms>0){ - if(mp->sp) - j.starts = mp->sp; - if(mp->ep) - j.eol = mp->ep; + tmp = clist; + clist = nlist; + nlist = tmp; + nlist->head = nil; + nlist->tail = &nlist->head; } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; - - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; - - rv = regexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = regexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + if(ep != nil) + *ep = endc; + return match; } diff --git a/sys/src/libregexp/regimpl.h b/sys/src/libregexp/regimpl.h new file mode 100644 index 000000000..f173a970c --- /dev/null +++ b/sys/src/libregexp/regimpl.h @@ -0,0 +1,104 @@ +enum +{ + LANY = 0, + LBOL, + LCLASS, + LEND, + LEOL, + LLPAR, + LOR, + LREP, + LRPAR, + LRUNE, + + TANY = 0, + TBOL, + TCAT, + TCLASS, + TEOL, + TNOTNL, + TOR, + TPLUS, + TQUES, + TRUNE, + TSTAR, + TSUB, + + NSUBEXPM = 32 +}; + +typedef struct Parselex Parselex; +typedef struct Renode Renode; + +struct Parselex +{ + /* Parse */ + Renode *next; + Renode *nodes; + int sub; + int instrs; + jmp_buf exitenv; + /* Lex */ + void (*getnextr)(Parselex*); + char *rawexp; + char *orig; + Rune rune; + Rune peek; + int peeklex; + int done; + int literal; + Rune cpairs[400+2]; + int nc; +}; +struct Renode +{ + int op; + Renode *left; + Rune r; + union + { + Rune r1; + int sub; + Renode *right; + }; + int nclass; +}; +struct Rethread +{ + Reinst *pc; + Resub sem[NSUBEXPM]; + int pri; + Rethread *next; +}; +struct Reinst +{ + char op; + int gen; + Reinst *a; + union + { + Rune r; + int sub; + }; + union + { + Rune r1; + Reinst *b; + }; +}; + +static int lex(Parselex*); +static void getnextr(Parselex*); +static void getnextrlit(Parselex*); +static void getclass(Parselex*); +static Renode *e0(Parselex*); +static Renode *e1(Parselex*); +static Renode *e2(Parselex*); +static Renode *e3(Parselex*); +static Renode *buildclass(Parselex*); +static Renode *buildclassn(Parselex*); +static int pcmp(void*, void*); +static Reprog *regcomp1(char*, int, int); +static Reinst *compile(Renode*, Reprog*, int); +static Reinst *compile1(Renode*, Reinst*, int*, int); +static void prtree(Renode*, int, int); diff --git a/sys/src/libregexp/regprint.c b/sys/src/libregexp/regprint.c new file mode 100644 index 000000000..673ca1f7b --- /dev/null +++ b/sys/src/libregexp/regprint.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include + +static int +fmtprinst(Fmt *f, Reinst *inst) +{ + int r; + + r = fmtprint(f, "%p ", inst); + switch(inst->op) { + case ORUNE: + r += fmtprint(f, "ORUNE\t%C\n", inst->r); + break; + case ONOTNL: + r += fmtprint(f, "ONOTNL\n"); + break; + case OCLASS: + r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a); + break; + case OSPLIT: + r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b); + break; + case OJMP: + r += fmtprint(f, "OJMP \t%p\n", inst->a); + break; + case OSAVE: + r += fmtprint(f, "OSAVE\t%d\n", inst->sub); + break; + case OUNSAVE: + r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub); + break; + case OANY: + r += fmtprint(f, "OANY \t.\n"); + break; + case OEOL: + r += fmtprint(f, "OEOL \t$\n"); + break; + case OBOL: + r += fmtprint(f, "OBOL \t^\n"); + break; + } + return r; +} + +static int +fmtprprog(Fmt *f, Reprog *reprog) +{ + Reinst *inst; + int r; + + r = 0; + for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++) + r += fmtprinst(f, inst); + return r; +} + +int +reprogfmt(Fmt *f) +{ + Reprog *r; + + r = va_arg(f->args, Reprog*); + return fmtprprog(f, r); +} diff --git a/sys/src/libregexp/regsub.c b/sys/src/libregexp/regsub.c index 2474d2989..d11d86f25 100644 --- a/sys/src/libregexp/regsub.c +++ b/sys/src/libregexp/regsub.c @@ -1,63 +1,66 @@ #include #include -#include "regexp.h" +#include -/* substitute into one string using the matches from the last regexec() */ -extern void -regsub(char *sp, /* source string */ - char *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ +void +regsub(char *src, char *dst, int dlen, Resub *match, int msize) { - char *ssp, *ep; int i; + char *ep, c; - ep = dp+dlen-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp!=0 && mp[i].sp != 0 && ms>i) - for(ssp = mp[i].sp; - ssp < mp[i].ep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; + ep = dst + dlen-1; + for(;*src != '\0'; src++) switch(*src) { + case '\\': + switch(*++src) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *src - '0'; + if(match != nil && i < msize && match[i].ep != nil) { + c = *match[i].ep; + *match[i].ep = '\0'; + dst = strecpy(dst, ep+1, match[i].sp); + *match[i].ep = c; } - }else if(*sp == '&'){ - if(mp!=0 && mp[0].sp != 0 && ms>0) - for(ssp = mp[0].sp; - ssp < mp[0].ep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; + break; + case '\\': + if(dst < ep) + *dst++ = '\\'; + else + goto End; + break; + case '\0': + goto End; + default: + if(dst < ep) + *dst++ = *src; + else + goto End; + break; } - sp++; + break; + case '&': + if(match != nil && msize > 0 && match[0].sp != nil) { + c = *match[0].ep; + *match[0].ep = '\0'; + dst = strecpy(dst, ep+1, match[0].sp); + *match[0].ep = c; + } + break; + default: + if(dst < ep) + *dst++ = *src; + else + goto End; + break; } - *dp = '\0'; +End: + *dst = '\0'; } diff --git a/sys/src/libregexp/rregexec.c b/sys/src/libregexp/rregexec.c index 301a3589b..557580b9c 100644 --- a/sys/src/libregexp/rregexec.c +++ b/sys/src/libregexp/rregexec.c @@ -1,212 +1,200 @@ #include #include -#include "regexp.h" -#include "regcomp.h" +#include +#include "regimpl.h" -/* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space - */ -static int -rregexec1(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j) +typedef struct RethreadQ RethreadQ; +struct RethreadQ { - int flag=0; - Reinst *inst; - Relist *tlp; - Rune *s; - int i, checkstart; - Rune r, *rp, *ep; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - Rune *p; + Rethread *head; + Rethread **tail; +}; - match = 0; - checkstart = j->startchar; - if(mp) - for(i=0; irelist[0][0].inst = 0; - j->relist[1][0].inst = 0; +int +rregexec(Reprog *prog, Rune *str, Resub *sem, int msize) +{ + RethreadQ lists[2], *clist, *nlist, *tmp; + Rethread *t, *next, *pooltop, *avail; + Reinst *curinst; + Rune *rsp, *rep, endr, last; + int match, first, gen, pri, matchpri; - /* Execute machine once for each character, including terminal NUL */ - s = j->rstarts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = runestrchr(s, j->startchar); - if(p == 0 || s == j->reol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = runestrchr(s, '\n'); - if(p == 0 || s == j->reol) - return match; - s = p+1; - break; - } - } + if(msize > NSUBEXPM) + msize = NSUBEXPM; - r = *s; + if(prog->startinst->gen != 0) { + for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++) + curinst->gen = 0; + } - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + clist = lists; + clist->head = nil; + clist->tail = &clist->head; + nlist = lists + 1; + nlist->head = nil; + nlist->tail = &nlist->head; - /* Add first instruction to current list */ - _rrenewemptythread(tl, progp->startinst, ms, s); + pooltop = prog->threads + prog->nthr; + avail = nil; - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ - for(inst=tlp->inst; ; inst = inst->next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r == r) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->subid].rsp = s; - continue; - case RBRA: - tlp->se.m[inst->subid].rep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->reol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->cp->end; - for(rp = inst->cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].rep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; + pri = matchpri = gen = match = 0; + rsp = str; + rep = nil; + endr = L'\0'; + if(sem != nil && msize > 0) { + if(sem->rsp != nil) + rsp = sem->rsp; + if(sem->rep != nil && *sem->rep != L'\0') { + rep = sem->rep; + endr = *sem->rep; + *sem->rep = '\0'; + } + } + last = 1; + for(; last != L'\0'; rsp++) { + gen++; + last = *rsp; + first = 1; + t = clist->head; + if(t == nil) + goto Start; + curinst = t->pc; +Again: + if(curinst->gen == gen) + goto Done; + curinst->gen = gen; + switch(curinst->op) { + case ORUNE: + if(*rsp != curinst->r) + goto Done; + case OANY: /* fallthrough */ + Any: + next = t->next; + t->pc = curinst + 1; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(next == nil) + break; + t = next; + curinst = t->pc; + goto Again; + case OCLASS: + Class: + if(*rsp < curinst->r) + goto Done; + if(*rsp > curinst->r1) { + curinst++; + goto Class; + } + next = t->next; + t->pc = curinst->a; + t->next = nil; + *nlist->tail = t; + nlist->tail = &t->next; + if(next == nil) + break; + t = next; + curinst = t->pc; + goto Again; + case ONOTNL: + if(*rsp != L'\n') { + curinst++; + goto Again; + } + goto Done; + case OBOL: + if(rsp == str || rsp[-1] == L'\n') { + curinst++; + goto Again; + } + goto Done; + case OEOL: + if(*rsp == L'\0' && rep == nil) { + curinst++; + goto Again; + } + if(*rsp == '\n') + goto Any; + goto Done; + case OJMP: + curinst = curinst->a; + goto Again; + case OSPLIT: + if(avail == nil) + next = --pooltop; + else { + next = avail; + avail = avail->next; + } + next->pc = curinst->b; + if(msize > 0) + memcpy(next->sem, t->sem, sizeof(Resub)*msize); + next->pri = t->pri; + next->next = t->next; + t->next = next; + curinst = curinst->a; + goto Again; + case OSAVE: + if(curinst->sub < msize) + t->sem[curinst->sub].rsp = rsp; + curinst++; + goto Again; + case OUNSAVE: + if(curinst->sub == 0) { + /* "Highest" priority is the left-most longest. */ + if (t->pri > matchpri) + goto Done; + match = 1; + matchpri = t->pri; + if(sem != nil && msize > 0) { + memcpy(sem, t->sem, sizeof(Resub)*msize); + sem->rep = rsp; } + goto Done; + } + if(curinst->sub < msize) + t->sem[curinst->sub].rep = rsp; + curinst++; + goto Again; + Done: + next = t->next; + t->next = avail; + avail = t; + if(next == nil) break; + t = next; + curinst = t->pc; + goto Again; + } +Start: + /* Start again once if we haven't found anything. */ + if(first == 1 && match == 0) { + first = 0; + if(avail == nil) + t = --pooltop; + else { + t = avail; + avail = avail->next; } + if(msize > 0) + memset(t->sem, 0, sizeof(Resub)*msize); + /* "Lower" priority thread */ + t->pri = matchpri = pri++; + t->next = nil; + curinst = prog->startinst; + goto Again; } - if(s == j->reol) + /* If we have a match and no extant threads, we are done. */ + if(match == 1 && nlist->head == nil) break; - checkstart = j->startchar && nl->inst==0; - s++; - }while(r); - return match; -} - -static int -rregexec2(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j -) -{ - Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; - - /* mark space */ - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + nelem(relist0) - 2; - j->reliste[1] = relist1 + nelem(relist1) - 2; - - return rregexec1(progp, bol, mp, ms, j); -} - -extern int -rregexec(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ -{ - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; - - /* - * use user-specified starting/ending location if specified - */ - j.rstarts = bol; - j.reol = 0; - if(mp && ms>0){ - if(mp->sp) - j.rstarts = mp->rsp; - if(mp->ep) - j.reol = mp->rep; + tmp = clist; + clist = nlist; + nlist = tmp; + nlist->head = nil; + nlist->tail = &nlist->head; } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; - - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; - - rv = rregexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = rregexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + if(rep != nil) + *rep = endr; + return match; } diff --git a/sys/src/libregexp/rregsub.c b/sys/src/libregexp/rregsub.c index c395583d0..88cbbfc89 100644 --- a/sys/src/libregexp/rregsub.c +++ b/sys/src/libregexp/rregsub.c @@ -1,64 +1,66 @@ #include #include -#include "regexp.h" +#include -/* substitute into one string using the matches from the last regexec() */ -extern void -rregsub(Rune *sp, /* source string */ - Rune *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ +void +rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize) { - Rune *ssp, *ep; int i; + Rune *ep, r; - ep = dp+(dlen/sizeof(Rune))-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp[i].rsp != 0 && mp!=0 && ms>i) - for(ssp = mp[i].rsp; - ssp < mp[i].rep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; + ep = dst + dlen-1; + for(;*src != L'\0'; src++) switch(*src) { + case L'\\': + switch(*++src) { + case L'0': + case L'1': + case L'2': + case L'3': + case L'4': + case L'5': + case L'6': + case L'7': + case L'8': + case L'9': + i = *src - L'0'; + if(match != nil && i < msize && match[i].rsp != nil) { + r = *match[i].rep; + *match[i].rep = L'\0'; + dst = runestrecpy(dst, ep+1, match[i].rsp); + *match[i].rep = r; } - }else if(*sp == '&'){ - if(mp[0].rsp != 0 && mp!=0 && ms>0) - if(mp[0].rsp != 0) - for(ssp = mp[0].rsp; - ssp < mp[0].rep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; + break; + case L'\\': + if(dst < ep) + *dst++ = L'\\'; + else + goto End; + break; + case L'\0': + goto End; + default: + if(dst < ep) + *dst++ = *src; + else + goto End; + break; } - sp++; + break; + case L'&': + if(match != nil && msize > 0 && match[0].rsp != nil) { + r = *match[0].rep; + *match[0].rep = L'\0'; + dst = runestrecpy(dst, ep+1, match[0].rsp); + *match[0].rep = r; + } + break; + default: + if(dst < ep) + *dst++ = *src; + else + goto End; + break; } - *dp = '\0'; +End: + *dst = L'\0'; }