]> git.lizzy.rs Git - plan9front.git/commitdiff
merge
authorcinap_lenrek <cinap_lenrek@felloff.net>
Thu, 28 Apr 2016 18:53:53 +0000 (20:53 +0200)
committercinap_lenrek <cinap_lenrek@felloff.net>
Thu, 28 Apr 2016 18:53:53 +0000 (20:53 +0200)
45 files changed:
sys/doc/prfile [changed mode: 0644->0755]
sys/include/regexp.h
sys/src/ape/lib/mkfile
sys/src/ape/lib/regexp/mkfile [deleted file]
sys/src/ape/lib/regexp/regaux.c [deleted file]
sys/src/ape/lib/regexp/regcomp.c [deleted file]
sys/src/ape/lib/regexp/regcomp.h [deleted file]
sys/src/ape/lib/regexp/regerror.c [deleted file]
sys/src/ape/lib/regexp/regexec.c [deleted file]
sys/src/ape/lib/regexp/regsub.c [deleted file]
sys/src/ape/lib/regexp/rregexec.c [deleted file]
sys/src/ape/lib/regexp/rregsub.c [deleted file]
sys/src/cmd/awk/awk.h
sys/src/cmd/awk/awkgram.y
sys/src/cmd/awk/lex.c
sys/src/cmd/awk/lib.c
sys/src/cmd/awk/main.c
sys/src/cmd/awk/maketab.c
sys/src/cmd/awk/mkfile
sys/src/cmd/awk/parse.c
sys/src/cmd/awk/popen.c [new file with mode: 0644]
sys/src/cmd/awk/proto.h
sys/src/cmd/awk/re.c
sys/src/cmd/awk/run.c
sys/src/cmd/awk/tran.c
sys/src/cmd/upas/bayes/addhash.c
sys/src/cmd/upas/bayes/bayes.c
sys/src/cmd/upas/bayes/dfa.c
sys/src/cmd/upas/bayes/dump.c
sys/src/cmd/upas/bayes/msgtok.c
sys/src/cmd/upas/bayes/regcomp.c
sys/src/cmd/upas/bayes/regcomp.h [new file with mode: 0644]
sys/src/cmd/upas/bayes/regen.c
sys/src/cmd/upas/bayes/regexp.h [new file with mode: 0644]
sys/src/libregexp/mkfile
sys/src/libregexp/regaux.c [deleted file]
sys/src/libregexp/regcomp.c
sys/src/libregexp/regcomp.h [deleted file]
sys/src/libregexp/regerror.c
sys/src/libregexp/regexec.c
sys/src/libregexp/regimpl.h [new file with mode: 0644]
sys/src/libregexp/regprint.c [new file with mode: 0644]
sys/src/libregexp/regsub.c
sys/src/libregexp/rregexec.c
sys/src/libregexp/rregsub.c

old mode 100644 (file)
new mode 100755 (executable)
index a574674acfa875d556c49ede355af8635ae9f509..77094280190f7fc8693a7427d0b8c0af8d2be9bd 100644 (file)
@@ -1,15 +1,29 @@
-#pragma        src     "/sys/src/libregexp"
-#pragma        lib     "libregexp.a"
+#pragma src "/sys/src/libregexp"
+#pragma lib "libregexp.a"
+enum
+{
+       OANY = 0,
+       OBOL,
+       OCLASS,
+       OEOL,
+       OJMP,
+       ONOTNL,
+       ORUNE,
+       OSAVE,
+       OSPLIT,
+       OUNSAVE,
+};
+
+typedef struct Resub Resub;
+typedef struct Reinst Reinst;
+typedef struct Reprog Reprog;
+typedef struct Rethread Rethread;
 
-typedef struct Resub           Resub;
-typedef struct Reclass         Reclass;
-typedef struct Reinst          Reinst;
-typedef struct Reprog          Reprog;
+#pragma incomplete Reinst
+#pragma incomplete Rethread
 
-/*
- *     Sub expression matches
- */
-struct Resub{
+struct Resub
+{
        union
        {
                char *sp;
@@ -21,46 +35,21 @@ struct Resub{
                Rune *rep;
        };
 };
-
-/*
- *     character class, each pair of rune's defines a range
- */
-struct Reclass{
-       Rune    *end;
-       Rune    spans[64];
-};
-
-/*
- *     Machine instructions
- */
-struct Reinst{
-       int     type;
-       union   {
-               Reclass *cp;            /* class pointer */
-               Rune    r;              /* character */
-               int     subid;          /* sub-expression id for RBRA and LBRA */
-               Reinst  *right;         /* right child of OR */
-       };
-       union { /* regexp relies on these two being in the same union */
-               Reinst *left;           /* left child of OR */
-               Reinst *next;           /* next instruction for CAT & LBRA */
-       };
-};
-
-/*
- *     Reprogram definition
- */
-struct Reprog{
-       Reinst  *startinst;     /* start pc */
-       Reclass class[16];      /* .data */
-       Reinst  firstinst[5];   /* .text */
+struct Reprog
+{
+       Reinst *startinst;
+       Rethread *threads;
+       char *regstr;
+       int len;
+       int nthr;
 };
 
-extern Reprog  *regcomp(char*);
-extern Reprog  *regcomplit(char*);
-extern Reprog  *regcompnl(char*);
-extern void    regerror(char*);
-extern int     regexec(Reprog*, char*, Resub*, int);
-extern void    regsub(char*, char*, int, Resub*, int);
-extern int     rregexec(Reprog*, Rune*, Resub*, int);
-extern void    rregsub(Rune*, Rune*, int, Resub*, int);
+Reprog*        regcomp(char*);
+Reprog*        regcomplit(char*);
+Reprog*        regcompnl(char*);
+void   regerror(char*);
+int    regexec(Reprog*, char*, Resub*, int);
+void   regsub(char*, char*, int, Resub*, int);
+int    rregexec(Reprog*, Rune*, Resub*, int);
+void   rregsub(Rune*, Rune*, int, Resub*, int);
+int    reprogfmt(Fmt *);
index 3762294d313fceb5951c45599ebf41abeedb325c..6c24c3674caf07a4afcd79cfd9c42de3bd85929c 100644 (file)
@@ -1,6 +1,6 @@
 </$objtype/mkfile
 
-DIRS=9 ap auth bio bsd bz2 draw fmt l mp net regexp sec utf v z
+DIRS=9 ap auth bio bsd bz2 draw fmt l mp net sec utf v z
 
 none:V:
        echo mk all, install, installall, clean, or nuke
diff --git a/sys/src/ape/lib/regexp/mkfile b/sys/src/ape/lib/regexp/mkfile
deleted file mode 100644 (file)
index e5d8bc2..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-APE=/sys/src/ape
-<$APE/config
-
-LIB=/$objtype/lib/ape/libregexp.a
-OFILES=regcomp.$O\
-       regerror.$O\
-       regexec.$O\
-       regsub.$O\
-       regaux.$O\
-       rregexec.$O\
-       rregsub.$O\
-
-</sys/src/cmd/mksyslib
-
-CFLAGS=-c -DUTF -D_REGEXP_EXTENSION
diff --git a/sys/src/ape/lib/regexp/regaux.c b/sys/src/ape/lib/regexp/regaux.c
deleted file mode 100644 (file)
index 6643b8f..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-/*
- *     Machine state
- */
-Relist*        _relist[2];
-Relist*        _reliste[2];
-int    _relistsize = LISTINCREMENT;
-
-/*
- *  save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
-       int i;
-
-       if(mp==0 || ms<=0)
-               return;
-       if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
-          (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
-               for(i=0; i<ms && i<NSUBEXP; i++)
-                       mp[i] = sp->m[i];
-               for(; i<ms; i++)
-                       mp[i].s.sp = mp[i].e.ep = 0;
-       }
-}
-
-/*
- * Note optimization in _renewthread:
- *     *lp must be pending when _renewthread called; if *l has been looked
- *             at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp,       /* _relist to add to */
-       Reinst *ip,     /* instruction to add */
-       Resublist *sep) /* pointers to subexpressions */
-{
-       Relist *p;
-
-       for(p=lp; p->inst; p++){
-               if(p->inst == ip){
-                       if((sep)->m[0].s.sp < p->se.m[0].s.sp)
-                               p->se = *sep;
-                       return 0;
-               }
-       }
-       p->inst = ip;
-       p->se = *sep;
-       (++p)->inst = 0;
-       return p;
-}
-
diff --git a/sys/src/ape/lib/regexp/regcomp.c b/sys/src/ape/lib/regexp/regcomp.c
deleted file mode 100644 (file)
index 5d31fc2..0000000
+++ /dev/null
@@ -1,560 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <string.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define        TRUE    1
-#define        FALSE   0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
-       Reinst* first;
-       Reinst* last;
-}Node;
-
-#define        NSTACK  20
-static Node    andstack[NSTACK];
-static Node    *andp;
-static int     atorstack[NSTACK];
-static int*    atorp;
-static int     cursubid;               /* id of current subexpression */
-static int     subidstack[NSTACK];     /* parallel to atorstack */
-static int*    subidp;
-static int     lastwasand;     /* Last token was operand */
-static int     nbra;
-static char*   exprp;          /* pointer to next character in source expression */
-static int     lexdone;
-static int     nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int     errors;
-static wchar_t yyrune;         /* last lex'd rune */
-static Reclass*yyclassp;       /* last lex'd class */
-
-/* predeclared crap */
-static void    operator(int);
-static void    pushand(Reinst*, Reinst*);
-static void    pushator(int);
-static void    evaluntil(int);
-static int     bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
-{
-       errors++;
-       regerror(s);
-       longjmp(regkaboom, 1);
-}
-
-static Reinst*
-newinst(int t)
-{
-       freep->type = t;
-       freep->l.left = 0;
-       freep->r.right = 0;
-       return freep++;
-}
-
-static void
-operand(int t)
-{
-       Reinst *i;
-
-       if(lastwasand)
-               operator(CAT);  /* catenate is implicit */
-       i = newinst(t);
-
-       if(t == CCLASS || t == NCCLASS)
-               i->r.cp = yyclassp;
-       if(t == RUNE)
-               i->r.r = yyrune;
-
-       pushand(i, i);
-       lastwasand = TRUE;
-}
-
-static void
-operator(int t)
-{
-       if(t==RBRA && --nbra<0)
-               rcerror("unmatched right paren");
-       if(t==LBRA){
-               if(++cursubid >= NSUBEXP)
-                       rcerror ("too many subexpressions");
-               nbra++;
-               if(lastwasand)
-                       operator(CAT);
-       } else
-               evaluntil(t);
-       if(t != RBRA)
-               pushator(t);
-       lastwasand = FALSE;
-       if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
-               lastwasand = TRUE;      /* these look like operands */
-}
-
-static void
-regerr2(char *s, int c)
-{
-       char buf[100];
-       char *cp = buf;
-       while(*s)
-               *cp++ = *s++;
-       *cp++ = c;
-       *cp = '\0'; 
-       rcerror(buf);
-}
-
-static void
-cant(char *s)
-{
-       char buf[100];
-       strcpy(buf, "can't happen: ");
-       strcat(buf, s);
-       rcerror(buf);
-}
-
-static void
-pushand(Reinst *f, Reinst *l)
-{
-       if(andp >= &andstack[NSTACK])
-               cant("operand stack overflow");
-       andp->first = f;
-       andp->last = l;
-       andp++;
-}
-
-static void
-pushator(int t)
-{
-       if(atorp >= &atorstack[NSTACK])
-               cant("operator stack overflow");
-       *atorp++ = t;
-       *subidp++ = cursubid;
-}
-
-static Node*
-popand(int op)
-{
-       Reinst *inst;
-
-       if(andp <= &andstack[0]){
-               regerr2("missing operand for ", op);
-               inst = newinst(NOP);
-               pushand(inst,inst);
-       }
-       return --andp;
-}
-
-static int
-popator(void)
-{
-       if(atorp <= &atorstack[0])
-               cant("operator stack underflow");
-       --subidp;
-       return *--atorp;
-}
-
-static void
-evaluntil(int pri)
-{
-       Node *op1, *op2;
-       Reinst *inst1, *inst2;
-
-       while(pri==RBRA || atorp[-1]>=pri){
-               switch(popator()){
-               default:
-                       rcerror("unknown operator in evaluntil");
-                       break;
-               case LBRA:              /* must have been RBRA */
-                       op1 = popand('(');
-                       inst2 = newinst(RBRA);
-                       inst2->r.subid = *subidp;
-                       op1->last->l.next = inst2;
-                       inst1 = newinst(LBRA);
-                       inst1->r.subid = *subidp;
-                       inst1->l.next = op1->first;
-                       pushand(inst1, inst2);
-                       return;
-               case OR:
-                       op2 = popand('|');
-                       op1 = popand('|');
-                       inst2 = newinst(NOP);
-                       op2->last->l.next = inst2;
-                       op1->last->l.next = inst2;
-                       inst1 = newinst(OR);
-                       inst1->r.right = op1->first;
-                       inst1->l.left = op2->first;
-                       pushand(inst1, inst2);
-                       break;
-               case CAT:
-                       op2 = popand(0);
-                       op1 = popand(0);
-                       op1->last->l.next = op2->first;
-                       pushand(op1->first, op2->last);
-                       break;
-               case STAR:
-                       op2 = popand('*');
-                       inst1 = newinst(OR);
-                       op2->last->l.next = inst1;
-                       inst1->r.right = op2->first;
-                       pushand(inst1, inst1);
-                       break;
-               case PLUS:
-                       op2 = popand('+');
-                       inst1 = newinst(OR);
-                       op2->last->l.next = inst1;
-                       inst1->r.right = op2->first;
-                       pushand(op2->first, inst1);
-                       break;
-               case QUEST:
-                       op2 = popand('?');
-                       inst1 = newinst(OR);
-                       inst2 = newinst(NOP);
-                       inst1->l.left = inst2;
-                       inst1->r.right = op2->first;
-                       op2->last->l.next = inst2;
-                       pushand(inst1, inst2);
-                       break;
-               }
-       }
-}
-
-static Reprog*
-optimize(Reprog *pp)
-{
-       Reinst *inst, *target;
-       int size;
-       Reprog *npp;
-       int diff;
-
-       /*
-        *  get rid of NOOP chains
-        */
-       for(inst=pp->firstinst; inst->type!=END; inst++){
-               target = inst->l.next;
-               while(target->type == NOP)
-                       target = target->l.next;
-               inst->l.next = target;
-       }
-
-       /*
-        *  The original allocation is for an area larger than
-        *  necessary.  Reallocate to the actual space used
-        *  and then relocate the code.
-        */
-       size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
-       npp = realloc(pp, size);
-       if(npp==0 || npp==pp)
-               return pp;
-       diff = (char *)npp - (char *)pp;
-       freep = (Reinst *)((char *)freep + diff);
-       for(inst=npp->firstinst; inst<freep; inst++){
-               switch(inst->type){
-               case OR:
-               case STAR:
-               case PLUS:
-               case QUEST:
-               case CCLASS:
-               case NCCLASS:
-                       *(char **)&inst->r.right += diff;
-                       break;
-               }
-               *(char **)&inst->l.left += diff;
-       }
-       *(char **)&npp->startinst += diff;
-       return npp;
-}
-
-#ifdef DEBUG
-static void
-dumpstack(void){
-       Node *stk;
-       int *ip;
-
-       print("operators\n");
-       for(ip=atorstack; ip<atorp; ip++)
-               print("0%o\n", *ip);
-       print("operands\n");
-       for(stk=andstack; stk<andp; stk++)
-               print("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static void
-dump(Reprog *pp)
-{
-       Reinst *l;
-       wchar_t *p;
-
-       l = pp->firstinst;
-       do{
-               print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
-                       l->l.left-pp->firstinst, l->l.right-pp->firstinst);
-               if(l->type == RUNE)
-                       print("\t%C\n", l->r);
-               else if(l->type == CCLASS || l->type == NCCLASS){
-                       print("\t[");
-                       if(l->type == NCCLASS)
-                               print("^");
-                       for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
-                               if(p[0] == p[1])
-                                       print("%C", p[0]);
-                               else
-                                       print("%C-%C", p[0], p[1]);
-                       print("]\n");
-               } else
-                       print("\n");
-       }while(l++->type);
-}
-#endif
-
-static Reclass*
-newclass(void)
-{
-       if(nclass >= NCLASS)
-               regerr2("too many character classes; limit", NCLASS+'0');
-       return &(classp[nclass++]);
-}
-
-static int
-nextc(wchar_t *rp)
-{
-       int n;
-
-       if(lexdone){
-               *rp = 0;
-               return 1;
-       }
-       n = mbtowc(rp, exprp, MB_CUR_MAX);
-       if (n <= 0)
-               n = 1;
-       exprp += n;
-       if(*rp == L'\\'){
-               n = mbtowc(rp, exprp, MB_CUR_MAX);
-               if (n <= 0)
-                       n = 1;
-               exprp += n;
-               return 1;
-       }
-       if(*rp == 0)
-               lexdone = 1;
-       return 0;
-}
-
-static int
-lex(int literal, int dot_type)
-{
-       int quoted;
-
-       quoted = nextc(&yyrune);
-       if(literal || quoted){
-               if(yyrune == 0)
-                       return END;
-               return RUNE;
-       }
-
-       switch(yyrune){
-       case 0:
-               return END;
-       case L'*':
-               return STAR;
-       case L'?':
-               return QUEST;
-       case L'+':
-               return PLUS;
-       case L'|':
-               return OR;
-       case L'.':
-               return dot_type;
-       case L'(':
-               return LBRA;
-       case L')':
-               return RBRA;
-       case L'^':
-               return BOL;
-       case L'$':
-               return EOL;
-       case L'[':
-               return bldcclass();
-       }
-       return RUNE;
-}
-
-static int
-bldcclass(void)
-{
-       int type;
-       wchar_t r[NCCRUNE];
-       wchar_t *p, *ep, *np;
-       wchar_t rune;
-       int quoted;
-
-       /* we have already seen the '[' */
-       type = CCLASS;
-       yyclassp = newclass();
-
-       /* look ahead for negation */
-       ep = r;
-       quoted = nextc(&rune);
-       if(!quoted && rune == L'^'){
-               type = NCCLASS;
-               quoted = nextc(&rune);
-               *ep++ = L'\n';
-               *ep++ = L'\n';
-       }
-
-       /* parse class into a set of spans */
-       for(; ep<&r[NCCRUNE];){
-               if(rune == 0){
-                       rcerror("malformed '[]'");
-                       return 0;
-               }
-               if(!quoted && rune == L']')
-                       break;
-               if(!quoted && rune == L'-'){
-                       if(ep == r){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       quoted = nextc(&rune);
-                       if((!quoted && rune == L']') || rune == 0){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       *(ep-1) = rune;
-               } else {
-                       *ep++ = rune;
-                       *ep++ = rune;
-               }
-               quoted = nextc(&rune);
-       }
-
-       /* sort on span start */
-       for(p = r; p < ep; p += 2){
-               for(np = p; np < ep; np += 2)
-                       if(*np < *p){
-                               rune = np[0];
-                               np[0] = p[0];
-                               p[0] = rune;
-                               rune = np[1];
-                               np[1] = p[1];
-                               p[1] = rune;
-                       }
-       }
-
-       /* merge spans */
-       np = yyclassp->spans;
-       p = r;
-       if(r == ep)
-               yyclassp->end = np;
-       else {
-               np[0] = *p++;
-               np[1] = *p++;
-               for(; p < ep; p += 2)
-                       if(p[0] <= np[1]){
-                               if(p[1] > np[1])
-                                       np[1] = p[1];
-                       } else {
-                               np += 2;
-                               np[0] = p[0];
-                               np[1] = p[1];
-                       }
-               yyclassp->end = np+2;
-       }
-
-       return type;
-}
-
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
-{
-       int token;
-       Reprog *pp;
-
-       /* get memory for the program */
-       pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
-       if(pp == 0){
-               regerror("out of memory");
-               return 0;
-       }
-       freep = pp->firstinst;
-       classp = pp->class;
-       errors = 0;
-
-       if(setjmp(regkaboom))
-               goto out;
-
-       /* go compile the sucker */
-       lexdone = 0;
-       exprp = s;
-       nclass = 0;
-       nbra = 0;
-       atorp = atorstack;
-       andp = andstack;
-       subidp = subidstack;
-       lastwasand = FALSE;
-       cursubid = 0;
-
-       /* Start with a low priority operator to prime parser */
-       pushator(START-1);
-       while((token = lex(literal, dot_type)) != END){
-               if((token&0300) == OPERATOR)
-                       operator(token);
-               else
-                       operand(token);
-       }
-
-       /* Close with a low priority operator */
-       evaluntil(START);
-
-       /* Force END */
-       operand(END);
-       evaluntil(START);
-#ifdef DEBUG
-       dumpstack();
-#endif
-       if(nbra)
-               rcerror("unmatched left paren");
-       --andp; /* points to first and only operand */
-       pp->startinst = andp->first;
-#ifdef DEBUG
-       dump(pp);
-#endif
-       pp = optimize(pp);
-#ifdef DEBUG
-       print("start: %d\n", andp->first-pp->firstinst);
-       dump(pp);
-#endif
-out:
-       if(errors){
-               free(pp);
-               pp = 0;
-       }
-       return pp;
-}
-
-extern Reprog*
-regcomp(char *s)
-{
-       return regcomp1(s, 0, ANY);
-}
-
-extern Reprog*
-regcomplit(char *s)
-{
-       return regcomp1(s, 1, ANY);
-}
-
-extern Reprog*
-regcompnl(char *s)
-{
-       return regcomp1(s, 0, ANYNL);
-}
diff --git a/sys/src/ape/lib/regexp/regcomp.h b/sys/src/ape/lib/regexp/regcomp.h
deleted file mode 100644 (file)
index 082e613..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  substitution list
- */
-typedef struct Resublist       Resublist;
-struct Resublist
-{
-       Resub   m[32];
-};
-
-/* max subexpressions per program */
-Resublist ReSuBlIsT;
-#define NSUBEXP (sizeof(ReSuBlIsT.m)/sizeof(Resub))
-
-/* max character classes per program */
-Reprog RePrOg;
-#define        NCLASS  (sizeof(RePrOg.class)/sizeof(Reclass))
-
-/* max rune ranges per character class */
-#define NCCRUNE        (sizeof(Reclass)/sizeof(wchar_t))
-
-/*
- * Actions and Tokens (Reinst types)
- *
- *     02xx are operators, value == precedence
- *     03xx are tokens, i.e. operands for operators
- */
-#define RUNE           0177
-#define        OPERATOR        0200    /* Bitmask of all operators */
-#define        START           0200    /* Start, used for marker on stack */
-#define        RBRA            0201    /* Right bracket, ) */
-#define        LBRA            0202    /* Left bracket, ( */
-#define        OR              0203    /* Alternation, | */
-#define        CAT             0204    /* Concatentation, implicit operator */
-#define        STAR            0205    /* Closure, * */
-#define        PLUS            0206    /* a+ == aa* */
-#define        QUEST           0207    /* a? == a|nothing, i.e. 0 or 1 a's */
-#define        ANY             0300    /* Any character except newline, . */
-#define        ANYNL           0301    /* Any character including newline, . */
-#define        NOP             0302    /* No operation, internal use only */
-#define        BOL             0303    /* Beginning of line, ^ */
-#define        EOL             0304    /* End of line, $ */
-#define        CCLASS          0305    /* Character class, [] */
-#define        NCCLASS         0306    /* Negated character class, [] */
-#define        END             0377    /* Terminate: match found */
-
-/*
- *  regexec execution lists
- */
-#define LISTINCREMENT 8
-typedef struct Relist  Relist;
-struct Relist
-{
-       Reinst          *inst;          /* Reinstruction of the thread */
-       Resublist       se;             /* matched subexpressions in this thread */
-};
-extern Relist* _relist[2];
-extern Relist* _reliste[2];
-extern int     _relistsize;
-
-extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
-extern void    _renewmatch(Resub*, int, Resublist*);
diff --git a/sys/src/ape/lib/regexp/regerror.c b/sys/src/ape/lib/regexp/regerror.c
deleted file mode 100644 (file)
index bfa1582..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "regexp.h"
-
-void
-regerror(char *s)
-{
-       char buf[132];
-
-       strcpy(buf, "regerror: ");
-       strcat(buf, s);
-       strcat(buf, "\n");
-       fwrite(buf, 1, strlen(buf), stderr);
-       exit(1);
-}
diff --git a/sys/src/ape/lib/regexp/regexec.c b/sys/src/ape/lib/regexp/regexec.c
deleted file mode 100644 (file)
index dee888b..0000000
+++ /dev/null
@@ -1,191 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty;               /* empty set of matches */
-
-/*
- *  return     0 if no match
- *             >0 if a match
- *             <0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp,        /* program to run */
-       char *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms,         /* number of elements at mp */
-       char *starts,
-       char *eol,
-       wchar_t startchar)
-{
-       int flag=0;
-       Reinst *inst;
-       Relist *tlp;
-       char *s;
-       int i, checkstart;
-       wchar_t r, *rp, *ep;
-       int n;
-       Relist* tl;             /* This list, next list */
-       Relist* nl;
-       Relist* tle;            /* ends of this and next list */
-       Relist* nle;
-       int match;
-
-       match = 0;
-       checkstart = startchar;
-       sempty.m[0].s.sp = 0;
-       if(mp!=0)
-               for(i=0; i<ms; i++)
-                       mp[i].s.sp = mp[i].e.ep = 0;
-       _relist[0][0].inst = _relist[1][0].inst = 0;
-
-       /* Execute machine once for each character, including terminal NUL */
-       s = starts;
-       do{
-               /* fast check for first char */
-               r = *(unsigned char*)s;
-               if(checkstart && r != startchar){
-                       s++;
-                       continue;
-               }
-
-               if(r < Runeself)
-                       n = 1;
-               else {
-                       n = mbtowc(&r, s, MB_CUR_MAX);
-                       if (n <= 0)
-                               n = 1;
-               }
-
-               /* switch run lists */
-               tl = _relist[flag];
-               tle = _reliste[flag];
-               nl = _relist[flag^=1];
-               nle = _reliste[flag];
-               nl->inst = 0;
-
-               /* Add first instruction to current list */
-               if(match == 0){
-                       sempty.m[0].s.sp = s;
-                       _renewthread(tl, progp->startinst, &sempty);
-               }
-
-               /* Execute machine until current list is empty */
-               for(tlp=tl; tlp->inst; tlp++){  /* assignment = */
-                       if(s == eol)
-                               break;
-
-                       for(inst = tlp->inst; ; inst = inst->l.next){
-                               switch(inst->type){
-                               case RUNE:      /* regular character */
-                                       if(inst->r.r == r)
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case LBRA:
-                                       tlp->se.m[inst->r.subid].s.sp = s;
-                                       continue;
-                               case RBRA:
-                                       tlp->se.m[inst->r.subid].e.ep = s;
-                                       continue;
-                               case ANY:
-                                       if(r != '\n')
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case ANYNL:
-                                       if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                               return -1;
-                                       break;
-                               case BOL:
-                                       if(s == bol || *(s-1) == '\n')
-                                               continue;
-                                       break;
-                               case EOL:
-                                       if(r == 0 || r == '\n')
-                                               continue;
-                                       break;
-                               case CCLASS:
-                                       ep = inst->r.cp->end;
-                                       for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1]){
-                                                       if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                               return -1;
-                                                       break;
-                                               }
-                                       break;
-                               case NCCLASS:
-                                       ep = inst->r.cp->end;
-                                       for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1])
-                                                       break;
-                                       if(rp == ep)
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case OR:
-                                       /* evaluate right choice later */
-                                       if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
-                                               return -1;
-                                       /* efficiency: advance and re-evaluate */
-                                       continue;
-                               case END:       /* Match! */
-                                       match = 1;
-                                       tlp->se.m[0].e.ep = s;
-                                       if(mp != 0)
-                                               _renewmatch(mp, ms, &tlp->se);
-                                       break;
-                               }
-                               break;
-                       }
-               }
-               checkstart = startchar && nl->inst==0;
-               s += n;
-       }while(r);
-       return match;
-}
-
-extern int
-regexec(Reprog *progp, /* program to run */
-       char *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements at mp */
-{
-       char *starts;   /* where to start match */
-       char *eol;      /* where to end match */
-       wchar_t startchar;
-       int rv;
-
-       /*
-        *  use user-specified starting/ending location if specified
-        */
-       starts = bol;
-       eol = 0;
-       if(mp && ms>0){
-               if(mp->s.sp)
-                       starts = mp->s.sp;
-               if(mp->e.ep)
-                       eol = mp->e.ep;
-       }
-       startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
-               ? progp->startinst->r.r : 0;
-
-       /* keep trying till we have enough list space to terminate */
-       for(;;){
-               if(_relist[0] == 0){
-                       _relist[0] = malloc(2*_relistsize*sizeof(Relist));
-                       _relist[1] = _relist[0] + _relistsize;
-                       _reliste[0] = _relist[0] + _relistsize - 1;
-                       _reliste[1] = _relist[1] + _relistsize - 1;
-                       if(_relist[0] == 0)
-                               regerror("_relist overflow");
-               }
-               rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
-               if(rv >= 0)
-                       return rv;
-               free(_relist[0]);
-               _relist[0] = 0;
-               _relistsize += LISTINCREMENT;
-       }
-}
diff --git a/sys/src/ape/lib/regexp/regsub.c b/sys/src/ape/lib/regexp/regsub.c
deleted file mode 100644 (file)
index 0c891e9..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp,       /* source string */
-       char *dp,       /* destination string */
-       int dlen,
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements pointed to by mp */
-{
-       char *ssp, *ep;
-       int i;
-
-       ep = dp+dlen-1;
-       while(*sp != '\0'){
-               if(*sp == '\\'){
-                       switch(*++sp){
-                       case '0':
-                       case '1':
-                       case '2':
-                       case '3':
-                       case '4':
-                       case '5':
-                       case '6':
-                       case '7':
-                       case '8':
-                       case '9':
-                               i = *sp-'0';
-                               if(mp[i].s.sp != 0 && mp!=0 && ms>i)
-                                       for(ssp = mp[i].s.sp;
-                                            ssp < mp[i].e.ep;
-                                            ssp++)
-                                               if(dp < ep)
-                                                       *dp++ = *ssp;
-                               break;
-                       case '\\':
-                               if(dp < ep)
-                                       *dp++ = '\\';
-                               break;
-                       case '\0':
-                               sp--;
-                               break;
-                       default:
-                               if(dp < ep)
-                                       *dp++ = *sp;
-                               break;
-                       }
-               }else if(*sp == '&'){                           
-                       if(mp[0].s.sp != 0 && mp!=0 && ms>0)
-                       if(mp[0].s.sp != 0)
-                               for(ssp = mp[0].s.sp;
-                                    ssp < mp[0].e.ep; ssp++)
-                                       if(dp < ep)
-                                               *dp++ = *ssp;
-               }else{
-                       if(dp < ep)
-                               *dp++ = *sp;
-               }
-               sp++;
-       }
-       *dp = '\0';
-}
diff --git a/sys/src/ape/lib/regexp/rregexec.c b/sys/src/ape/lib/regexp/rregexec.c
deleted file mode 100644 (file)
index 519ac25..0000000
+++ /dev/null
@@ -1,181 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-static Resublist sempty;               /* empty set of matches */
-
-/*
- *  return     0 if no match
- *             >0 if a match
- *             <0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp,       /* program to run */
-       wchar_t *bol,           /* string to run machine on */
-       Resub *mp,              /* subexpression elements */
-       int ms,                 /* number of elements at mp */
-       wchar_t *starts,
-       wchar_t *eol,
-       wchar_t startchar)
-{
-       int flag=0;
-       Reinst *inst;
-       Relist *tlp;
-       wchar_t *s;
-       int i, checkstart;
-       wchar_t r, *rp, *ep;
-       int n;
-       Relist* tl;             /* This list, next list */
-       Relist* nl;
-       Relist* tle;            /* ends of this and next list */
-       Relist* nle;
-       int match;
-
-       match = 0;
-       checkstart = startchar;
-       sempty.m[0].s.rsp = 0;
-       if(mp!=0)
-               for(i=0; i<ms; i++)
-                       mp[i].s.rsp = mp[i].e.rep = 0;
-       _relist[0][0].inst = _relist[1][0].inst = 0;
-
-       /* Execute machine once for each character, including terminal NUL */
-       s = starts;
-       do{
-               r = *s;
-
-               /* fast check for first char */
-               if(checkstart && r!=startchar){
-                       s++;
-                       continue;
-               }
-
-               /* switch run lists */
-               tl = _relist[flag];
-               tle = _reliste[flag];
-               nl = _relist[flag^=1];
-               nle = _reliste[flag];
-               nl->inst = 0;
-
-               /* Add first instruction to current list */
-               sempty.m[0].s.rsp = s;
-               _renewthread(tl, progp->startinst, &sempty);
-
-               /* Execute machine until current list is empty */
-               for(tlp=tl; tlp->inst; tlp++){  /* assignment = */
-                       if(s == eol)
-                               break;
-
-                       for(inst=tlp->inst; ; inst = inst->l.next){
-                               switch(inst->type){
-                               case RUNE:      /* regular character */
-                                       if(inst->r.r == r)
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case LBRA:
-                                       tlp->se.m[inst->r.subid].s.rsp = s;
-                                       continue;
-                               case RBRA:
-                                       tlp->se.m[inst->r.subid].e.rep = s;
-                                       continue;
-                               case ANY:
-                                       if(r != '\n')
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case ANYNL:
-                                       if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case BOL:
-                                       if(s == bol || *(s-1) == '\n')
-                                               continue;
-                                       break;
-                               case EOL:
-                                       if(r == 0 || r == '\n')
-                                               continue;
-                                       break;
-                               case CCLASS:
-                                       ep = inst->r.cp->end;
-                                       for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1]){
-                                                       if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                               return -1;
-                                                       break;
-                                               }
-                                       break;
-                               case NCCLASS:
-                                       ep = inst->r.cp->end;
-                                       for(rp = inst->r.cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1])
-                                                       break;
-                                       if(rp == ep)
-                                               if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case OR:
-                                       /* evaluate right choice later */
-                                       if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
-                                               return -1;
-                                       /* efficiency: advance and re-evaluate */
-                                       continue;
-                               case END:       /* Match! */
-                                       match = 1;
-                                       tlp->se.m[0].e.rep = s;
-                                       if(mp != 0)
-                                               _renewmatch(mp, ms, &tlp->se);
-                                       break;
-                               }
-                               break;
-                       }
-               }
-               checkstart = startchar && nl->inst==0;
-               s++;
-       }while(r);
-       return match;
-}
-
-extern int
-rregexec(Reprog *progp,        /* program to run */
-       wchar_t *bol,   /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements at mp */
-{
-       wchar_t *starts;        /* where to start match */
-       wchar_t *eol;   /* where to end match */
-       wchar_t startchar;
-       int rv;
-
-       /*
-        *  use user-specified starting/ending location if specified
-        */
-       starts = bol;
-       eol = 0;
-       if(mp && ms>0){
-               if(mp->s.rsp)
-                       starts = mp->s.rsp;
-               if(mp->e.rep)
-                       eol = mp->e.rep;
-       }
-       startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
-
-       /* keep trying till we have enough list space to terminate */
-       for(;;){
-               if(_relist[0] == 0){
-                       _relist[0] = malloc(2*_relistsize*sizeof(Relist));
-                       _relist[1] = _relist[0] + _relistsize;
-                       _reliste[0] = _relist[0] + _relistsize - 1;
-                       _reliste[1] = _relist[1] + _relistsize - 1;
-                       if(_relist[0] == 0)
-                               regerror("_relist overflow");
-               }
-               rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
-               if(rv >= 0)
-                       return rv;
-               free(_relist[0]);
-               _relist[0] = 0;
-               _relistsize += LISTINCREMENT;
-       }
-}
diff --git a/sys/src/ape/lib/regexp/rregsub.c b/sys/src/ape/lib/regexp/rregsub.c
deleted file mode 100644 (file)
index 5fc3e59..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include "regexp.h"
-
-/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(wchar_t *sp,   /* source string */
-       wchar_t *dp,    /* destination string */
-       int dlen,
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements pointed to by mp */
-{
-       wchar_t *ssp, *ep;
-       int i;
-
-       ep = dp+(dlen/sizeof(wchar_t))-1;
-       while(*sp != '\0'){
-               if(*sp == '\\'){
-                       switch(*++sp){
-                       case '0':
-                       case '1':
-                       case '2':
-                       case '3':
-                       case '4':
-                       case '5':
-                       case '6':
-                       case '7':
-                       case '8':
-                       case '9':
-                               i = *sp-'0';
-                               if(mp[i].s.rsp != 0 && mp!=0 && ms>i)
-                                       for(ssp = mp[i].s.rsp;
-                                            ssp < mp[i].e.rep;
-                                            ssp++)
-                                               if(dp < ep)
-                                                       *dp++ = *ssp;
-                               break;
-                       case '\\':
-                               if(dp < ep)
-                                       *dp++ = '\\';
-                               break;
-                       case '\0':
-                               sp--;
-                               break;
-                       default:
-                               if(dp < ep)
-                                       *dp++ = *sp;
-                               break;
-                       }
-               }else if(*sp == '&'){                           
-                       if(mp[0].s.rsp != 0 && mp!=0 && ms>0)
-                       if(mp[0].s.rsp != 0)
-                               for(ssp = mp[0].s.rsp;
-                                    ssp < mp[0].e.rep; ssp++)
-                                       if(dp < ep)
-                                               *dp++ = *ssp;
-               }else{
-                       if(dp < ep)
-                               *dp++ = *sp;
-               }
-               sp++;
-       }
-       *dp = '\0';
-}
index 0bdcd8183eb5588f6332fe6a1a85dc8bb3d849d6..2ede5e1cfe3de31a1baa5f38cc555dac9b9017c4 100644 (file)
@@ -6,20 +6,18 @@ Copyright (c) Lucent Technologies 1997
 
 typedef double Awkfloat;
 
-/* unsigned char is more trouble than it's worth */
-
-typedef        unsigned char uschar;
-
-#define        xfree(a)        { if ((a) != NULL) { free((char *) a); a = NULL; } }
+#define        xfree(a)        { if ((a) != nil) { free((a)); (a) = nil; } }
 
 #define        DEBUG
 #ifdef DEBUG
                        /* uses have to be doubly parenthesized */
-#      define  dprintf(x)      if (dbg) printf x
+#      define  dprint(x)       if (dbg) print x
 #else
-#      define  dprintf(x)
+#      define  dprint(x)
 #endif
 
+#define        FOPEN_MAX       40      /* max number of open files */
+
 extern char    errbuf[];
 
 extern int     compile_time;   /* 1 if compiling, 0 if running */
@@ -28,6 +26,10 @@ extern int   safe;           /* 0 => unsafe, 1 => safe */
 #define        RECSIZE (8 * 1024)      /* sets limit on records, fields, etc., etc. */
 extern int     recsize;        /* size of current record, orig RECSIZE */
 
+extern Biobuf stdin;
+extern Biobuf stdout;
+extern Biobuf stderr;
+
 extern char    **FS;
 extern char    **RS;
 extern char    **ORS;
@@ -56,8 +58,8 @@ extern        int     patlen;         /* length of pattern matched.  set in b.c */
 /* Cell:  all information about a variable or constant */
 
 typedef struct Cell {
-       uschar  ctype;          /* OCELL, OBOOL, OJUMP, etc. */
-       uschar  csub;           /* CCON, CTEMP, CFLD, etc. */
+       uchar   ctype;          /* OCELL, OBOOL, OJUMP, etc. */
+       uchar   csub;           /* CCON, CTEMP, CFLD, etc. */
        char    *nval;          /* name, for variables only */
        char    *sval;          /* string value */
        Awkfloat fval;          /* value as number */
@@ -66,7 +68,7 @@ typedef struct Cell {
 } Cell;
 
 typedef struct Array {         /* symbol table array */
-       int     nelem         /* elements in table right now */
+       int     nelemt;         /* elements in table right now */
        int     size;           /* size of tab */
        Cell    **tab;          /* hash table pointers */
 } Array;
index 1216beb211ea50438e5222c685a4ed19c987e862..7ddf6087bf675074b2b9b3c0ef6b798002427221 100644 (file)
@@ -23,8 +23,9 @@ THIS SOFTWARE.
 ****************************************************************/
 
 %{
-#include <stdio.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
 #include "awk.h"
 
 #define        makedfa(a,b)    compre(a)
index d59317e103e8b988e50dbfd5c367a08e59b7a73f..90ecf4245d6ef9edca4949eca1cfd887382c9c76 100644 (file)
@@ -22,10 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
 #include <ctype.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
@@ -90,9 +90,8 @@ Keyword keywords[] ={ /* keep sorted: binary searched */
        { "while",      WHILE,          WHILE },
 };
 
-#define DEBUG
 #ifdef DEBUG
-#define        RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
+#define        RET(x)  { if(dbg)print("lex %s\n", tokname(x)); return(x); }
 #else
 #define        RET(x)  return(x)
 #endif
@@ -170,7 +169,7 @@ int yylex(void)
        static char *buf = 0;
        static int bufsize = 500;
 
-       if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
+       if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
                FATAL( "out of space in yylex" );
        if (sc) {
                sc = 0;
@@ -353,7 +352,7 @@ int string(void)
        static char *buf = 0;
        static int bufsz = 500;
 
-       if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+       if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
                FATAL("out of space for strings");
        for (bp = buf; (c = input()) != '"'; ) {
                if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
@@ -401,7 +400,7 @@ int string(void)
                                }
                                *px = 0;
                                unput(c);
-                               sscanf(xbuf, "%x", &n);
+                               n = strtol(xbuf, nil, 16);
                                *bp++ = n;
                                break;
                            }
@@ -497,7 +496,7 @@ int regexpr(void)
        static int bufsz = 500;
        char *bp;
 
-       if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
+       if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
                FATAL("out of space for rex expr");
        bp = buf;
        for ( ; (c = input()) != '/' && c != 0; ) {
@@ -526,7 +525,7 @@ char        ebuf[300];
 char   *ep = ebuf;
 char   yysbuf[100];    /* pushback buffer */
 char   *yysptr = yysbuf;
-FILE   *yyin = 0;
+Biobuf *yyin;
 
 int input(void)        /* get next lexical input character */
 {
@@ -535,14 +534,14 @@ int input(void)   /* get next lexical input character */
 
        if (yysptr > yysbuf)
                c = *--yysptr;
-       else if (lexprog != NULL) {     /* awk '...' */
+       else if (lexprog != nil) {      /* awk '...' */
                if ((c = *lexprog) != 0)
                        lexprog++;
        } else                          /* awk -f ... */
                c = pgetc();
        if (c == '\n')
                lineno++;
-       else if (c == EOF)
+       else if (c == Beof)
                c = 0;
        if (ep >= ebuf + sizeof ebuf)
                ep = ebuf;
index 5a627369000cbe11025a5d080ee52d7e091e2f85..2d60e3abbcdffa95febc6bc9d7b25c2ad66abd03 100644 (file)
@@ -22,17 +22,14 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-#define DEBUG
-#include <stdio.h>
-#include <string.h>
+#include <u.h>
+#include <libc.h>
 #include <ctype.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <stdarg.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
-FILE   *infile = NULL;
+Biobuf *infile;
 char   *file   = "";
 char   *record;
 int    recsize = RECSIZE;
@@ -50,17 +47,17 @@ int donerec;        /* 1 = record is valid (no flds have changed) */
 
 int    lastfld = 0;    /* last used field */
 int    argno   = 1;    /* current input argument number */
-extern Awkfloat *ARGC;
+extern Awkfloat *AARGC;
 
-static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
-static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+static Cell dollar0 = { OCELL, CFLD, nil, "", 0.0, REC|STR|DONTFREE };
+static Cell dollar1 = { OCELL, CFLD, nil, "", 0.0, FLD|STR|DONTFREE };
 
 void recinit(unsigned int n)
 {
        record = (char *) malloc(n);
        fields = (char *) malloc(n);
        fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *));
-       if (record == NULL || fields == NULL || fldtab == NULL)
+       if (record == nil || fields == nil || fldtab == nil)
                FATAL("out of space for $0 and fields");
        fldtab[0] = (Cell *) malloc(sizeof (Cell));
        *fldtab[0] = dollar0;
@@ -76,10 +73,10 @@ void makefields(int n1, int n2)             /* create $n1..$n2 inclusive */
 
        for (i = n1; i <= n2; i++) {
                fldtab[i] = (Cell *) malloc(sizeof (struct Cell));
-               if (fldtab[i] == NULL)
+               if (fldtab[i] == nil)
                        FATAL("out of space in makefields %d", i);
                *fldtab[i] = dollar1;
-               sprintf(temp, "%d", i);
+               sprint(temp, "%d", i);
                fldtab[i]->nval = tostring(temp);
        }
 }
@@ -89,7 +86,7 @@ void initgetrec(void)
        int i;
        char *p;
 
-       for (i = 1; i < *ARGC; i++) {
+       for (i = 1; i < *AARGC; i++) {
                if (!isclvar(p = getargv(i))) { /* find 1st real filename */
                        setsval(lookup("FILENAME", symtab), getargv(i));
                        return;
@@ -97,7 +94,7 @@ void initgetrec(void)
                setclvar(p);    /* a commandline assignment before filename */
                argno++;
        }
-       infile = stdin;         /* no filenames, so use stdin */
+       infile = &stdin;                /* no filenames, so use &stdin */
 }
 
 int getrec(char **pbuf, int *pbufsize, int isrecord)   /* get next input record */
@@ -111,16 +108,16 @@ int getrec(char **pbuf, int *pbufsize, int isrecord)      /* get next input record */
                firsttime = 0;
                initgetrec();
        }
-          dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
-               *RS, *FS, *ARGC, *FILENAME) );
+          dprint( ("RS=<%s>, FS=<%s>, AARGC=%g, FILENAME=%s\n",
+               *RS, *FS, *AARGC, *FILENAME) );
        if (isrecord) {
                donefld = 0;
                donerec = 1;
        }
        buf[0] = 0;
-       while (argno < *ARGC || infile == stdin) {
-                  dprintf( ("argno=%d, file=|%s|\n", argno, file) );
-               if (infile == NULL) {   /* have to open a new file */
+       while (argno < *AARGC || infile == &stdin) {
+                  dprint( ("argno=%d, file=|%s|\n", argno, file) );
+               if (infile == nil) {    /* have to open a new file */
                        file = getargv(argno);
                        if (*file == '\0') {    /* it's been zapped */
                                argno++;
@@ -132,10 +129,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord)      /* get next input record */
                                continue;
                        }
                        *FILENAME = file;
-                          dprintf( ("opening file %s\n", file) );
+                          dprint( ("opening file %s\n", file) );
                        if (*file == '-' && *(file+1) == '\0')
-                               infile = stdin;
-                       else if ((infile = fopen(file, "r")) == NULL)
+                               infile = &stdin;
+                       else if ((infile = Bopen(file, OREAD)) == nil)
                                FATAL("can't open file %s", file);
                        setfval(fnrloc, 0.0);
                }
@@ -157,10 +154,10 @@ int getrec(char **pbuf, int *pbufsize, int isrecord)      /* get next input record */
                        *pbufsize = bufsize;
                        return 1;
                }
-               /* EOF arrived on this file; set up next */
-               if (infile != stdin)
-                       fclose(infile);
-               infile = NULL;
+               /* Beof arrived on this file; set up next */
+               if (infile != &stdin)
+                       Bterm(infile);
+               infile = nil;
                argno++;
        }
        *pbuf = buf;
@@ -170,13 +167,13 @@ int getrec(char **pbuf, int *pbufsize, int isrecord)      /* get next input record */
 
 void nextfile(void)
 {
-       if (infile != stdin)
-               fclose(infile);
-       infile = NULL;
+       if (infile != &stdin)
+               Bterm(infile);
+       infile = nil;
        argno++;
 }
 
-int readrec(char **pbuf, int *pbufsize, FILE *inf)     /* read one record into buf */
+int readrec(char **pbuf, int *pbufsize, Biobuf *inf)   /* read one record into buf */
 {
        int sep, c;
        char *rr, *buf = *pbuf;
@@ -187,21 +184,21 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf)        /* read one record into buf *
        strcpy(inputFS, *FS);   /* for subsequent field splitting */
        if ((sep = **RS) == 0) {
                sep = '\n';
-               while ((c=getc(inf)) == '\n' && c != EOF)       /* skip leading \n's */
+               while ((c=Bgetc(inf)) == '\n' && c != Beof)     /* skip leading \n's */
                        ;
-               if (c != EOF)
-                       ungetc(c, inf);
+               if (c != Beof)
+                       Bungetc(inf);
        }
        for (rr = buf; ; ) {
-               for (; (c=getc(inf)) != sep && c != EOF; ) {
+               for (; (c=Bgetc(inf)) != sep && c != Beof; ) {
                        if (rr-buf+1 > bufsize)
                                if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
                                        FATAL("input record `%.30s...' too long", buf);
                        *rr++ = c;
                }
-               if (**RS == sep || c == EOF)
+               if (**RS == sep || c == Beof)
                        break;
-               if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+               if ((c = Bgetc(inf)) == '\n' || c == Beof) /* 2 in a row */
                        break;
                if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
                        FATAL("input record `%.30s...' too long", buf);
@@ -211,10 +208,10 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf)        /* read one record into buf *
        if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
                FATAL("input record `%.30s...' too long", buf);
        *rr = 0;
-          dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
+          dprint( ("readrec saw <%s>, returns %d\n", buf, c == Beof && rr == buf ? 0 : 1) );
        *pbuf = buf;
        *pbufsize = bufsize;
-       return c == EOF && rr == buf ? 0 : 1;
+       return c == Beof && rr == buf ? 0 : 1;
 }
 
 char *getargv(int n)   /* get ARGV[n] */
@@ -223,10 +220,10 @@ char *getargv(int n)      /* get ARGV[n] */
        char *s, temp[50];
        extern Array *ARGVtab;
 
-       sprintf(temp, "%d", n);
+       sprint(temp, "%d", n);
        x = setsymtab(temp, "", 0.0, STR, ARGVtab);
        s = getsval(x);
-          dprintf( ("getargv(%d) returns |%s|\n", n, s) );
+       dprint( ("getargv(%d) returns |%s|\n", n, s) );
        return s;
 }
 
@@ -245,7 +242,7 @@ void setclvar(char *s)      /* set var=value from s */
                q->fval = atof(q->sval);
                q->tval |= NUM;
        }
-          dprintf( ("command line set %s to |%s|\n", s, p) );
+          dprint( ("command line set %s to |%s|\n", s, p) );
 }
 
 
@@ -265,7 +262,7 @@ void fldbld(void)   /* create fields from current record */
        n = strlen(r);
        if (n > fieldssize) {
                xfree(fields);
-               if ((fields = (char *) malloc(n+1)) == NULL)
+               if ((fields = (char *) malloc(n+1)) == nil)
                        FATAL("out of space for fields in fldbld %d", n);
                fieldssize = n;
        }
@@ -273,7 +270,7 @@ void fldbld(void)   /* create fields from current record */
        i = 0;  /* number of fields accumulated here */
        if (strlen(inputFS) > 1) {      /* it's a regular expression */
                i = refldbld(r, inputFS);
-       } else if ((sep = *inputFS) == ' ') {   /* default whitespace */
+       } else if (*inputFS == ' ') {   /* default whitespace */
                for (i = 0; ; ) {
                        while (*r == ' ' || *r == '\t' || *r == '\n')
                                r++;
@@ -339,7 +336,7 @@ void fldbld(void)   /* create fields from current record */
        if (dbg) {
                for (j = 0; j <= lastfld; j++) {
                        p = fldtab[j];
-                       printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
+                       print("field %d (%s): |%s|\n", j, p->nval, p->sval);
                }
        }
 }
@@ -383,7 +380,7 @@ void growfldtab(int n)      /* make new fields up to at least $n */
        if (n > nf)
                nf = n;
        fldtab = (Cell **) realloc(fldtab, (nf+1) * (sizeof (struct Cell *)));
-       if (fldtab == NULL)
+       if (fldtab == nil)
                FATAL("out of space creating %d fields", nf);
        makefields(nfields+1, nf);
        nfields = nf;
@@ -395,12 +392,12 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
        /* the fields are all stored in this one array with \0's */
        char *fr;
        void *p;
-       int i, tempstat, n;
+       int i, n;
 
        n = strlen(rec);
        if (n > fieldssize) {
                xfree(fields);
-               if ((fields = (char *) malloc(n+1)) == NULL)
+               if ((fields = (char *) malloc(n+1)) == nil)
                        FATAL("out of space for fields in refldbld %d", n);
                fieldssize = n;
        }
@@ -409,7 +406,7 @@ int refldbld(char *rec, char *fs)   /* build fields from reg expr in FS */
        if (*rec == '\0')
                return 0;
        p = compre(fs);
-          dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
+          dprint( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
        for (i = 1; ; i++) {
                if (i > nfields)
                        growfldtab(i);
@@ -417,15 +414,15 @@ int refldbld(char *rec, char *fs) /* build fields from reg expr in FS */
                        xfree(fldtab[i]->sval);
                fldtab[i]->tval = FLD | STR | DONTFREE;
                fldtab[i]->sval = fr;
-                  dprintf( ("refldbld: i=%d\n", i) );
+                  dprint( ("refldbld: i=%d\n", i) );
                if (nematch(p, rec, rec)) {
-                          dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
+                          dprint( ("match %s (%d chars)\n", patbeg, patlen) );
                        strncpy(fr, rec, patbeg-rec);
                        fr += patbeg - rec + 1;
                        *(fr-1) = '\0';
                        rec = patbeg + patlen;
                } else {
-                          dprintf( ("no match %s\n", rec) );
+                          dprint( ("no match %s\n", rec) );
                        strcpy(fr, rec);
                        break;
                }
@@ -457,15 +454,15 @@ void recbld(void) /* create $0 from $1..$NF if necessary */
        if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
                FATAL("built giant record `%.30s...'", record);
        *r = '\0';
-          dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
+          dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
 
        if (freeable(fldtab[0]))
                xfree(fldtab[0]->sval);
        fldtab[0]->tval = REC | STR | DONTFREE;
        fldtab[0]->sval = record;
 
-          dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
-          dprintf( ("recbld = |%s|\n", record) );
+          dprint( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, fldtab[0]) );
+          dprint( ("recbld = |%s|\n", record) );
        donerec = 1;
 }
 
@@ -484,24 +481,26 @@ void SYNTAX(char *fmt, ...)
 
        if (been_here++ > 2)
                return;
-       fprintf(stderr, "%s: ", cmdname);
+       Bprint(&stderr, "%s: ", cmdname);
        va_start(varg, fmt);
-       vfprintf(stderr, fmt, varg);
+       Bvprint(&stderr, fmt, varg);
        va_end(varg);
-       if(compile_time == 1 && cursource() != NULL)
-               fprintf(stderr, " at %s:%d", cursource(), lineno);
+       if(compile_time == 1 && cursource() != nil)
+               Bprint(&stderr, " at %s:%d", cursource(), lineno);
        else
-               fprintf(stderr, " at line %d", lineno);
-       if (curfname != NULL)
-               fprintf(stderr, " in function %s", curfname);
-       fprintf(stderr, "\n");
+               Bprint(&stderr, " at line %d", lineno);
+       if (curfname != nil)
+               Bprint(&stderr, " in function %s", curfname);
+       Bprint(&stderr, "\n");
        errorflag = 2;
        eprint();
 }
 
-void fpecatch(int n)
+int handler(void *, char *err)
 {
-       FATAL("floating point exception %d", n);
+       Bflush(&stdout);
+       fprint(2, "%s\n", err);
+       return 0;
 }
 
 extern int bracecnt, brackcnt, parencnt;
@@ -513,23 +512,23 @@ void bracecheck(void)
 
        if (beenhere++)
                return;
-       while ((c = input()) != EOF && c != '\0')
+       while ((c = input()) != Beof && c != '\0')
                bclass(c);
        bcheck2(bracecnt, '{', '}');
        bcheck2(brackcnt, '[', ']');
        bcheck2(parencnt, '(', ')');
 }
 
-void bcheck2(int n, int c1, int c2)
+void bcheck2(int n, int, int c2)
 {
        if (n == 1)
-               fprintf(stderr, "\tmissing %c\n", c2);
+               Bprint(&stderr, "\tmissing %c\n", c2);
        else if (n > 1)
-               fprintf(stderr, "\t%d missing %c's\n", n, c2);
+               Bprint(&stderr, "\t%d missing %c's\n", n, c2);
        else if (n == -1)
-               fprintf(stderr, "\textra %c\n", c2);
+               Bprint(&stderr, "\textra %c\n", c2);
        else if (n < -1)
-               fprintf(stderr, "\t%d extra %c's\n", -n, c2);
+               Bprint(&stderr, "\t%d extra %c's\n", -n, c2);
 }
 
 void FATAL(char *fmt, ...)
@@ -537,15 +536,15 @@ void FATAL(char *fmt, ...)
        extern char *cmdname;
        va_list varg;
 
-       fflush(stdout);
-       fprintf(stderr, "%s: ", cmdname);
+       Bflush(&stdout);
+       Bprint(&stderr, "%s: ", cmdname);
        va_start(varg, fmt);
-       vfprintf(stderr, fmt, varg);
+       Bvprint(&stderr, fmt, varg);
        va_end(varg);
        error();
        if (dbg > 1)            /* core dump if serious debugging on */
                abort();
-       exit(2);
+       exits("FATAL");
 }
 
 void WARNING(char *fmt, ...)
@@ -553,10 +552,10 @@ void WARNING(char *fmt, ...)
        extern char *cmdname;
        va_list varg;
 
-       fflush(stdout);
-       fprintf(stderr, "%s: ", cmdname);
+       Bflush(&stdout);
+       Bprint(&stderr, "%s: ", cmdname);
        va_start(varg, fmt);
-       vfprintf(stderr, fmt, varg);
+       Bvprint(&stderr, fmt, varg);
        va_end(varg);
        error();
 }
@@ -566,13 +565,13 @@ void error()
        extern Node *curnode;
        int line;
 
-       fprintf(stderr, "\n");
+       Bprint(&stderr, "\n");
        if (compile_time != 2 && NR && *NR > 0) {
                if (strcmp(*FILENAME, "-") != 0)
-                       fprintf(stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
+                       Bprint(&stderr, " input record %s:%d", *FILENAME, (int) (*FNR));
                else
-                       fprintf(stderr, " input record number %d", (int) (*FNR));
-               fprintf(stderr, "\n");
+                       Bprint(&stderr, " input record number %d", (int) (*FNR));
+               Bprint(&stderr, "\n");
        }
        if (compile_time != 2 && curnode)
                line = curnode->lineno;
@@ -580,14 +579,14 @@ void error()
                line = lineno;
        else
                line = -1;
-       if (compile_time == 1 && cursource() != NULL){
+       if (compile_time == 1 && cursource() != nil){
                if(line >= 0)
-                       fprintf(stderr, " source %s:%d", cursource(), line);
+                       Bprint(&stderr, " source %s:%d", cursource(), line);
                else
-                       fprintf(stderr, " source file %s", cursource());
+                       Bprint(&stderr, " source file %s", cursource());
        }else if(line >= 0)
-               fprintf(stderr, " source line %d", line);
-       fprintf(stderr, "\n");
+               Bprint(&stderr, " source line %d", line);
+       Bprint(&stderr, "\n");
        eprint();
 }
 
@@ -607,23 +606,23 @@ void eprint(void) /* try to print context around error */
                ;
        while (*p == '\n')
                p++;
-       fprintf(stderr, " context is\n\t");
+       Bprint(&stderr, " context is\n\t");
        for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
                ;
        for ( ; p < q; p++)
                if (*p)
-                       putc(*p, stderr);
-       fprintf(stderr, " >>> ");
+                       Bputc(&stderr, *p);
+       Bprint(&stderr, " >>> ");
        for ( ; p < ep; p++)
                if (*p)
-                       putc(*p, stderr);
-       fprintf(stderr, " <<< ");
+                       Bputc(&stderr, *p);
+       Bprint(&stderr, " <<< ");
        if (*ep)
-               while ((c = input()) != '\n' && c != '\0' && c != EOF) {
-                       putc(c, stderr);
+               while ((c = input()) != '\n' && c != '\0' && c != Beof) {
+                       Bputc(&stderr, c);
                        bclass(c);
                }
-       putc('\n', stderr);
+       Bputc(&stderr, '\n');
        ep = ebuf;
 }
 
@@ -642,12 +641,10 @@ void bclass(int c)
 double errcheck(double x, char *s)
 {
 
-       if (errno == EDOM) {
-               errno = 0;
+       if (isNaN(x)) {
                WARNING("%s argument out of domain", s);
                x = 1;
-       } else if (errno == ERANGE) {
-               errno = 0;
+       } else if (isInf(x, 1) || isInf(x, -1)) {
                WARNING("%s result out of range", s);
                x = 1;
        }
@@ -668,7 +665,6 @@ int isclvar(char *s)        /* is s of form var=something ? */
 
 /* strtod is supposed to be a proper test of what's a valid number */
 
-#include <math.h>
 int is_number(char *s)
 {
        double r;
@@ -699,9 +695,8 @@ int is_number(char *s)
                return 0;       /* can't be a number */
        }
 
-       errno = 0;
        r = strtod(s, &ep);
-       if (ep == s || r == HUGE_VAL || errno == ERANGE)
+       if (ep == s || isInf(r, 1) || isInf(r, -1))
                return 0;
        while (*ep == ' ' || *ep == '\t' || *ep == '\n')
                ep++;
index 532cc1f649e890b7da83c6ea4462deff38893a8c..3d1ae4e0dba0762f8ceae9d1ad0a27d0a2a0ac65 100644 (file)
@@ -24,21 +24,21 @@ THIS SOFTWARE.
 
 char   *version = "version 19990602";
 
-#define DEBUG
-#include <stdio.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <signal.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
-extern char    **environ;
 extern int     nfields;
 
+Biobuf stdin;
+Biobuf stdout;
+Biobuf stderr;
+
 int    dbg     = 0;
 char   *cmdname;       /* gets argv[0] for error messages */
-extern FILE    *yyin;  /* lex input file */
+extern Biobuf  *yyin;  /* lex input file */
 char   *lexprog;       /* points to program argument if it exists */
 extern int errorflag;  /* non-zero if any syntax errors; set by yyerror */
 int    compile_time = 2;       /* for error printing: */
@@ -50,18 +50,23 @@ int curpfile = 0;   /* current filename */
 
 int    safe    = 0;    /* 1 => "safe" mode */
 
-int main(int argc, char *argv[])
+void main(int argc, char *argv[])
 {
-       char *fs = NULL, *marg;
+       char *fs = nil, *marg;
        int temp;
 
+       Binit(&stdin, 0, OREAD);
+       Binit(&stdout, 1, OWRITE);
+       Binit(&stderr, 2, OWRITE);
+
        cmdname = argv[0];
        if (argc == 1) {
-               fprintf(stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
-               exit(1);
+               Bprint(&stderr, "Usage: %s [-F fieldsep] [-mf n] [-mr n] [-v var=value] [-f programfile | 'program'] [file ...]\n", cmdname);
+               exits("usage");
        }
-       signal(SIGFPE, fpecatch);
-       yyin = NULL;
+
+       atnotify(handler, 1);
+       yyin = nil;
        symtab = makesymtab(NSYMTAB);
        while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
                if (strcmp(argv[1], "--") == 0) {       /* explicit end of args */
@@ -94,7 +99,7 @@ int main(int argc, char *argv[])
                                else if (argc > 1 && argv[1][0] != 0)
                                        fs = &argv[1][0];
                        }
-                       if (fs == NULL || *fs == '\0')
+                       if (fs == nil || *fs == '\0')
                                WARNING("field separator FS is empty");
                        break;
                case 'v':       /* -v a=1 to be done NOW.  one -v for each */
@@ -120,11 +125,11 @@ int main(int argc, char *argv[])
                        dbg = atoi(&argv[1][2]);
                        if (dbg == 0)
                                dbg = 1;
-                       printf("awk %s\n", version);
+                       print("awk %s\n", version);
                        break;
                case 'V':       /* added for exptools "standard" */
-                       printf("awk %s\n", version);
-                       exit(0);
+                       print("awk %s\n", version);
+                       exits(0);
                        break;
                default:
                        WARNING("unknown option %s ignored", argv[1]);
@@ -137,10 +142,10 @@ int main(int argc, char *argv[])
        if (npfile == 0) {      /* no -f; first argument is program */
                if (argc <= 1) {
                        if (dbg)
-                               exit(0);
+                               exits(0);
                        FATAL("no program given");
                }
-                  dprintf( ("program = |%s|\n", argv[1]) );
+                  dprint( ("program = |%s|\n", argv[1]) );
                lexprog = argv[1];
                argc--;
                argv++;
@@ -149,20 +154,20 @@ int main(int argc, char *argv[])
        syminit();
        compile_time = 1;
        argv[0] = cmdname;      /* put prog name at front of arglist */
-          dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
+          dprint( ("argc=%d, argv[0]=%s\n", argc, argv[0]) );
        arginit(argc, argv);
-       if (!safe)
-               envinit(environ);
        yyparse();
        if (fs)
                *FS = qstring(fs, '\0');
-          dprintf( ("errorflag=%d\n", errorflag) );
+          dprint( ("errorflag=%d\n", errorflag) );
        if (errorflag == 0) {
                compile_time = 0;
                run(winner);
        } else
                bracecheck();
-       return(errorflag);
+       if(errorflag)
+               exits("error");
+       exits(0);
 }
 
 int pgetc(void)                /* get 1 character from awk program */
@@ -170,20 +175,20 @@ int pgetc(void)           /* get 1 character from awk program */
        int c;
 
        for (;;) {
-               if (yyin == NULL) {
+               if (yyin == nil) {
                        if (curpfile >= npfile)
-                               return EOF;
+                               return Beof;
                        if (strcmp(pfile[curpfile], "-") == 0)
-                               yyin = stdin;
-                       else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
+                               yyin = &stdin;
+                       else if ((yyin = Bopen(pfile[curpfile], OREAD)) == nil)
                                FATAL("can't open file %s", pfile[curpfile]);
                        lineno = 1;
                }
-               if ((c = getc(yyin)) != EOF)
+               if ((c = Bgetc(yyin)) != Beof)
                        return c;
-               if (yyin != stdin)
-                       fclose(yyin);
-               yyin = NULL;
+               if (yyin != &stdin)
+                       Bterm(yyin);
+               yyin = nil;
                curpfile++;
        }
 }
@@ -193,5 +198,5 @@ char *cursource(void)       /* current source file name */
        if (npfile > 0)
                return pfile[curpfile];
        else
-               return NULL;
+               return nil;
 }
index 13d34e96e0d0c2d2f960ad68cfa589cf27b7f797..70e214a50cd0eff0152072ad263b63828c6d29ef 100644 (file)
@@ -28,9 +28,9 @@ THIS SOFTWARE.
  * it finds the indices in y.tab.h, produced by yacc.
  */
 
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
@@ -39,7 +39,7 @@ struct xx
        char *name;
        char *pname;
 } proc[] = {
-       { PROGRAM, "program", NULL },
+       { PROGRAM, "program", nil },
        { BOR, "boolop", " || " },
        { AND, "boolop", " && " },
        { NOT, "boolop", " !" },
@@ -49,13 +49,13 @@ struct xx
        { LT, "relop", " < " },
        { GE, "relop", " >= " },
        { GT, "relop", " > " },
-       { ARRAY, "array", NULL },
+       { ARRAY, "array", nil },
        { INDIRECT, "indirect", "$(" },
        { SUBSTR, "substr", "substr" },
        { SUB, "sub", "sub" },
        { GSUB, "gsub", "gsub" },
        { INDEX, "sindex", "sindex" },
-       { SPRINTF, "awksprintf", "sprintf " },
+       { SPRINTF, "awksprintf", "sprintf" },
        { ADD, "arith", " + " },
        { MINUS, "arith", " - " },
        { MULT, "arith", " * " },
@@ -68,8 +68,8 @@ struct xx
        { PREDECR, "incrdecr", "--" },
        { POSTDECR, "incrdecr", "--" },
        { CAT, "cat", " " },
-       { PASTAT, "pastat", NULL },
-       { PASTAT2, "dopa2", NULL },
+       { PASTAT, "pastat", nil },
+       { PASTAT2, "dopa2", nil },
        { MATCH, "matchop", " ~ " },
        { NOTMATCH, "matchop", " !~ " },
        { MATCHFCN, "matchop", "matchop" },
@@ -110,59 +110,62 @@ struct xx
 char *table[SIZE];
 char *names[SIZE];
 
-int main(int argc, char *argv[])
+void main(int, char**)
 {
        struct xx *p;
-       int i, n, tok;
-       char c;
-       FILE *fp;
-       char buf[200], name[200], def[200];
+       int i, tok;
+       Biobuf *fp;
+       char *buf, *toks[3];
 
-       printf("#include <stdio.h>\n");
-       printf("#include \"awk.h\"\n");
-       printf("#include \"y.tab.h\"\n\n");
+       print("#include <u.h>\n");
+       print("#include <libc.h>\n");
+       print("#include <bio.h>\n");
+       print("#include \"awk.h\"\n");
+       print("#include \"y.tab.h\"\n\n");
        for (i = SIZE; --i >= 0; )
                names[i] = "";
 
-       if ((fp = fopen("y.tab.h", "r")) == NULL) {
-               fprintf(stderr, "maketab can't open y.tab.h!\n");
-               exit(1);
+       if ((fp = Bopen("y.tab.h", OREAD)) == nil) {
+               fprint(2, "maketab can't open y.tab.h!\n");
+               exits("can't open y.tab.h");
        }
-       printf("static char *printname[%d] = {\n", SIZE);
+       print("static char *printname[%d] = {\n", SIZE);
        i = 0;
-       while (fgets(buf, sizeof buf, fp) != NULL) {
-               n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
-               if (c != '#' || (n != 4 && strcmp(def,"define") != 0))  /* not a valid #define */
+       while ((buf = Brdline(fp, '\n')) != nil) {
+               buf[Blinelen(fp)-1] = '\0';
+               tokenize(buf, toks, 3);
+               if (toks[0] == nil || strcmp("#define", toks[0]) != 0)  /* not a valid #define */
                        continue;
+               tok = strtol(toks[2], nil, 10);
                if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
-                       fprintf(stderr, "maketab funny token %d %s\n", tok, buf);
-                       exit(1);
+                       fprint(2, "maketab funny token %d %s\n", tok, buf);
+                       exits("funny token");
                }
-               names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1);
-               strcpy(names[tok-FIRSTTOKEN], name);
-               printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
+               names[tok-FIRSTTOKEN] = (char *) malloc(strlen(toks[1])+1);
+               strcpy(names[tok-FIRSTTOKEN], toks[1]);
+               print("\t(char *) \"%s\",\t/* %d */\n", toks[1], tok);
                i++;
        }
-       printf("};\n\n");
+       print("};\n\n");
 
        for (p=proc; p->token!=0; p++)
                table[p->token-FIRSTTOKEN] = p->name;
-       printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
+       print("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
        for (i=0; i<SIZE; i++)
                if (table[i]==0)
-                       printf("\tnullproc,\t/* %s */\n", names[i]);
+                       print("\tnullproc,\t/* %s */\n", names[i]);
                else
-                       printf("\t%s,\t/* %s */\n", table[i], names[i]);
-       printf("};\n\n");
+                       print("\t%s,\t/* %s */\n", table[i], names[i]);
+       print("};\n\n");
 
-       printf("char *tokname(int n)\n");       /* print a tokname() function */
-       printf("{\n");
-       printf("        static char buf[100];\n\n");
-       printf("        if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
-       printf("                sprintf(buf, \"token %%d\", n);\n");
-       printf("                return buf;\n");
-       printf("        }\n");
-       printf("        return printname[n-FIRSTTOKEN];\n");
-       printf("}\n");
-       return 0;
+       print("char *tokname(int n)\n");        /* print a tokname() function */
+       print("{\n");
+       print(" static char buf[100];\n\n");
+       print(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
+       print("         sprint(buf, \"token %%d\", n);\n");
+       print("         return buf;\n");
+       print(" }\n");
+       print(" return printname[n-FIRSTTOKEN];\n");
+       print("}\n");
+       exits(0);
 }
index 35ee78d7cc593a7663fea4f2723a8654b3ca2ff8..9e30f01481dab5d1e50b3230cbbc39f6473abbc5 100644 (file)
@@ -6,6 +6,7 @@ OFILES=re.$O\
        main.$O\
        parse.$O\
        proctab.$O\
+       popen.$O\
        tran.$O\
        lib.$O\
        run.$O\
@@ -28,11 +29,6 @@ UPDATE=\
        ${TARG:%=/386/bin/%}\
 
 </sys/src/cmd/mkone
-CFLAGS=-c -D_REGEXP_EXTENSION -D_RESEARCH_SOURCE -D_BSD_EXTENSION -DUTF
-YFLAGS=-S -d -v
-CC=pcc
-LD=pcc
-cpuobjtype=`{sed -n 's/^O=//p' /$cputype/mkfile}
 
 y.tab.h awkgram.c:     $YFILES
        $YACC -o awkgram.c $YFLAGS $prereq
@@ -43,10 +39,10 @@ clean:V:
 nuke:V:
        rm -f *.[$OS] [$OS].out [$OS].maketab y.tab.? y.debug y.output awkgram.c proctab.c $TARG
 
-proctab.c:     $cpuobjtype.maketab
-       ./$cpuobjtype.maketab >proctab.c
+proctab.c:     $O.maketab
+       ./$O.maketab >proctab.c
 
-$cpuobjtype.maketab:   y.tab.h maketab.c
+$O.maketab:    y.tab.h maketab.c
        objtype=$cputype
        mk maketab.$cputype
 
index 2ee6eca621314b74a671f6a6237c3be5f0bbc422..3e42d2ba7f0e7a5ea45c1a1201349f9f8a190b70 100644 (file)
@@ -22,10 +22,9 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-#define DEBUG
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
@@ -34,9 +33,9 @@ Node *nodealloc(int n)
        Node *x;
 
        x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *));
-       if (x == NULL)
+       if (x == nil)
                FATAL("out of space in nodealloc");
-       x->nnext = NULL;
+       x->nnext = nil;
        x->lineno = lineno;
        return(x);
 }
@@ -220,11 +219,11 @@ Node *linkum(Node *a, Node *b)
 
        if (errorflag)  /* don't link things that are wrong */
                return a;
-       if (a == NULL)
+       if (a == nil)
                return(b);
-       else if (b == NULL)
+       else if (b == nil)
                return(a);
-       for (c = a; c->nnext != NULL; c = c->nnext)
+       for (c = a; c->nnext != nil; c = c->nnext)
                ;
        c->nnext = b;
        return(a);
@@ -245,7 +244,7 @@ void defn(Cell *v, Node *vl, Node *st)      /* turn on FCN bit in definition, */
        for (p = vl; p; p = p->nnext)
                n++;
        v->fval = n;
-       dprintf( ("defining func %s (%d args)\n", v->nval, n) );
+       dprint( ("defining func %s (%d args)\n", v->nval, n) );
 }
 
 int isarg(char *s)             /* is s in argument list for current function? */
@@ -262,7 +261,7 @@ int isarg(char *s)          /* is s in argument list for current function? */
 
 int ptoi(void *p)      /* convert pointer to integer */
 {
-       return (int) (long) p;  /* swearing that p fits, of course */
+       return (int) (vlong) p; /* swearing that p fits, of course */
 }
 
 Node *itonp(int i)     /* and vice versa */
diff --git a/sys/src/cmd/awk/popen.c b/sys/src/cmd/awk/popen.c
new file mode 100644 (file)
index 0000000..bcb769e
--- /dev/null
@@ -0,0 +1,91 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "awk.h"
+
+#define MAXFORKS       20
+#define NSYSFILE       3
+#define        tst(a,b)        (mode == OREAD? (b) : (a))
+#define        RDR     0
+#define        WTR     1
+
+struct a_fork {
+       short   done;
+       short   fd;
+       int     pid;
+       char status[128];
+};
+static struct a_fork the_fork[MAXFORKS];
+
+Biobuf*
+popen(char *cmd, int mode)
+{
+       int p[2];
+       int myside, hisside, pid;
+       int i, ind;
+
+       for (ind = 0; ind < MAXFORKS; ind++)
+               if (the_fork[ind].pid == 0)
+                       break;
+       if (ind == MAXFORKS)
+               return nil;
+       if(pipe(p) < 0)
+               return nil;
+       myside = tst(p[WTR], p[RDR]);
+       hisside = tst(p[RDR], p[WTR]);
+       switch (pid = fork()) {
+       case -1:
+               return nil;
+       case 0:
+               /* myside and hisside reverse roles in child */
+               close(myside);
+               dup(hisside, tst(0, 1));
+               for (i=NSYSFILE; i<FOPEN_MAX; i++)
+                       close(i);
+               execl("/bin/rc", "rc", "-c", cmd, nil);
+               exits("exec failed");
+       default:
+               the_fork[ind].pid = pid;
+               the_fork[ind].fd = myside;
+               the_fork[ind].done = 0;
+               close(hisside);
+               return(Bfdopen(myside, mode));
+       }
+}
+
+int
+pclose(Biobuf *ptr)
+{
+       int f, r, ind;
+       Waitmsg *status;
+
+       f = Bfildes(ptr);
+       Bterm(ptr);
+       for (ind = 0; ind < MAXFORKS; ind++)
+               if (the_fork[ind].fd == f && the_fork[ind].pid != 0)
+                       break;
+       if (ind == MAXFORKS)
+               return -1;
+       if (!the_fork[ind].done) {
+               do {
+                       if((status = wait()) == nil)
+                               r = -1;
+                       else
+                               r = status->pid;
+                       for (f = 0; f < MAXFORKS; f++) {
+                               if (r == the_fork[f].pid) {
+                                       the_fork[f].done = 1;
+                                       strecpy(the_fork[f].status, the_fork[f].status+512, status->msg);
+                                       break;
+                               }
+                       }
+                       free(status);
+               } while(r != the_fork[ind].pid && r != -1);
+               if(r == -1)
+                       strcpy(the_fork[ind].status, "No loved ones to wait for");
+       }
+       the_fork[ind].pid = 0;
+       if(the_fork[ind].status[0] != '\0')
+               return 1;
+       return 0;
+}
index 4566dd66c9d9884880e31c8a9b55e5dd31008075..6b14eadd121482887a52c51bcdc88dabf1033d9e 100644 (file)
@@ -44,7 +44,6 @@ extern        void    quoted(char **, char **, char *);
 extern int     match(void *, char *, char *);
 extern int     pmatch(void *, char *, char *);
 extern int     nematch(void *, char *, char *);
-extern int     countposn(char *, int);
 extern void    overflow(void);
 
 extern int     pgetc(void);
@@ -100,7 +99,7 @@ extern       void    makefields(int, int);
 extern void    growfldtab(int n);
 extern int     getrec(char **, int *, int);
 extern void    nextfile(void);
-extern int     readrec(char **buf, int *bufsize, FILE *inf);
+extern int     readrec(char **buf, int *bufsize, Biobuf *inf);
 extern char    *getargv(int);
 extern void    setclvar(char *);
 extern void    fldbld(void);
@@ -110,7 +109,7 @@ extern      int     refldbld(char *, char *);
 extern void    recbld(void);
 extern Cell    *fieldadr(int);
 extern void    yyerror(char *);
-extern void    fpecatch(int);
+extern int     handler(void*, char*);
 extern void    bracecheck(void);
 extern void    bcheck2(int, int, int);
 extern void    SYNTAX(char *, ...);
@@ -165,13 +164,13 @@ extern    Cell    *instat(Node **, int);
 extern Cell    *bltin(Node **, int);
 extern Cell    *printstat(Node **, int);
 extern Cell    *nullproc(Node **, int);
-extern FILE    *redirect(int, Node *);
-extern FILE    *openfile(int, char *);
-extern char    *filename(FILE *);
+extern Biobuf  *redirect(int, Node *);
+extern Biobuf  *openfile(int, char *);
+extern char    *filename(Biobuf *);
 extern Cell    *closefile(Node **, int);
 extern void    closeall(void);
 extern Cell    *sub(Node **, int);
 extern Cell    *gsub(Node **, int);
 
-extern FILE    *popen(const char *, const char *);
-extern int     pclose(FILE *);
+extern Biobuf  *popen(char *, int);
+extern int     pclose(Biobuf *);
index 32fe2af4753099ffdd4fad84278700f35d21e75f..f93a8dd45741b8b6bc37e212b3ede7372d069a21 100644 (file)
@@ -22,18 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-
-#define DEBUG
-#include <stdio.h>
+#include <u.h>
+#include <libc.h>
 #include <ctype.h>
-#include <setjmp.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
+#include <bio.h>
+#include <regexp.h>
 #include "awk.h"
 #include "y.tab.h"
-#include "regexp.h"
 
        /* This file provides the interface between the main body of
         * awk and the pattern matching package.  It preprocesses
@@ -198,11 +193,11 @@ pmatch(void *p, char *s, char *start)
 {
        Resub m;
 
-       m.s.sp = start;
-       m.e.ep = 0;
+       m.sp = start;
+       m.ep = 0;
        if (regexec((Reprog *) p, (char *) s, &m, 1)) {
-               patbeg = m.s.sp;
-               patlen = m.e.ep-m.s.sp;
+               patbeg = m.sp;
+               patlen = m.ep-m.sp;
                return 1;
        }
        patlen = -1;
@@ -250,7 +245,7 @@ quoted(char **s, char **to, char *end)      /* handle escaped sequence */
 {
        char *p = *s;
        char *t = *to;
-       wchar_t c;
+       Rune c;
 
        switch(c = *p++) {
        case 't':
@@ -273,8 +268,8 @@ quoted(char **s, char **to, char *end)      /* handle escaped sequence */
                        *t++ = '\\';
                if (c == 'x') {         /* hexadecimal goo follows */
                        c = hexstr(&p);
-                       if (t < end-MB_CUR_MAX)
-                               t += wctomb(t, c);
+                       if (t < end-UTFmax)
+                               t += runelen(c);
                        else overflow();
                        *to = t;
                        *s = p;
@@ -294,21 +289,6 @@ quoted(char **s, char **to, char *end)     /* handle escaped sequence */
        *s = p;
        *to = t;
 }
-       /* count rune positions */
-int
-countposn(char *s, int n)
-{
-       int i, j;
-       char *end;
-
-       for (i = 0, end = s+n; *s && s < end; i++){
-               j = mblen(s, n);
-               if(j <= 0)
-                       j = 1;
-               s += j;
-       }
-       return(i);
-}
 
        /* pattern package error handler */
 
index a8c03f1b5f23810771f3c7b6aaacd58a82431ca9..c9e90bc451296bb1c9e9e4a98282e24f9fe29a6d 100644 (file)
@@ -22,43 +22,13 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-#define DEBUG
-#include <stdio.h>
+#include <u.h>
+#include <libc.h>
 #include <ctype.h>
-#include <setjmp.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-#include <time.h>
-#include <utf.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
-#define tempfree(x)    if (istemp(x)) tfree(x); else
-
-/*
-#undef tempfree
-
-void tempfree(Cell *p) {
-       if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
-               WARNING("bad csub %d in Cell %d %s",
-                       p->csub, p->ctype, p->sval);
-       }
-       if (istemp(p))
-               tfree(p);
-}
-*/
-
-#ifdef _NFILE
-#ifndef FOPEN_MAX
-#define FOPEN_MAX _NFILE
-#endif
-#endif
-
-#ifndef        FOPEN_MAX
-#define        FOPEN_MAX       40      /* max number of open files */
-#endif
-
 #ifndef RAND_MAX
 #define RAND_MAX       32767   /* all that ansi guarantees */
 #endif
@@ -66,7 +36,7 @@ void tempfree(Cell *p) {
 jmp_buf env;
 extern int     pairstack[];
 
-Node   *winner = NULL; /* root of parse tree */
+Node   *winner = nil;  /* root of parse tree */
 Cell   *tmps;          /* free temporary cells for execution */
 
 static Cell    truecell        ={ OBOOL, BTRUE, 0, 0, 1.0, NUM };
@@ -87,7 +57,42 @@ static Cell  retcell         ={ OJUMP, JRET, 0, 0, 0.0, NUM };
 Cell   *jret   = &retcell;
 static Cell    tempcell        ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE };
 
-Node   *curnode = NULL;        /* the node being executed, for debugging */
+Node   *curnode = nil; /* the node being executed, for debugging */
+
+int
+system(const char *s)
+{
+       char status[512], *statfld[5];
+       int w, pid;
+       char cmd[30], *oty;
+
+       oty = getenv("cputype");
+       if(!oty)
+               return -1;
+       if(!s)
+               return 1; /* a command interpreter is available */
+       pid = fork();
+       snprint(cmd, sizeof cmd, "/%s/bin/ape/sh", oty);
+       if(pid == 0) {
+               execl(cmd, "sh", "-c", s, nil);
+               exits("exec");
+       }
+       if(pid < 0){
+               return -1;
+       }
+       for(;;) {
+               w = await(status, sizeof(status) - 1);
+               if(w == -1)
+                       return -1;
+               tokenize(status, statfld, nelem(statfld));
+               if(strtol(statfld[0], nil, 0) == pid)
+                       break;
+       }
+
+       if(*statfld[4] != '\0')
+               return 1;
+       return 0;
+}
 
 /* buffer memory management */
 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
@@ -110,7 +115,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
                if (rminlen)
                        minlen += quantum - rminlen;
                tbuf = (char *) realloc(*pbuf, minlen);
-               if (tbuf == NULL) {
+               if (tbuf == nil) {
                        if (whatrtn)
                                FATAL("out of memory in %s", whatrtn);
                        return 0;
@@ -139,7 +144,7 @@ Cell *execute(Node *u)      /* execute a node of the parse tree */
        Cell *x;
        Node *a;
 
-       if (u == NULL)
+       if (u == nil)
                return(True);
        for (a = u; ; a = a->nnext) {
                curnode = a;
@@ -164,14 +169,15 @@ Cell *execute(Node *u)    /* execute a node of the parse tree */
                        return(x);
                if (isjump(x))
                        return(x);
-               if (a->nnext == NULL)
+               if (a->nnext == nil)
                        return(x);
-               tempfree(x);
+               if(istemp(x))
+                       tfree(x);
        }
 }
 
 
-Cell *program(Node **a, int n) /* execute an awk program */
+Cell *program(Node **a, int)   /* execute an awk program */
 {                              /* a[0] = BEGIN, a[1] = body, a[2] = END */
        Cell *x;
 
@@ -183,14 +189,16 @@ Cell *program(Node **a, int n)    /* execute an awk program */
                        return(True);
                if (isjump(x))
                        FATAL("illegal break, continue, next or nextfile from BEGIN");
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        }
        if (a[1] || a[2])
                while (getrec(&record, &recsize, 1) > 0) {
                        x = execute(a[1]);
                        if (isexit(x))
                                break;
-                       tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
                }
   ex:
        if (setjmp(env) != 0)   /* handles exit within END */
@@ -199,7 +207,8 @@ Cell *program(Node **a, int n)      /* execute an awk program */
                x = execute(a[2]);
                if (isbreak(x) || isnext(x) || iscont(x))
                        FATAL("illegal break, continue, next or nextfile from END");
-               tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
        }
   ex1:
        return(True);
@@ -214,11 +223,11 @@ struct Frame {    /* stack frame for awk function calls */
 
 #define        NARGS   50      /* max args in a call */
 
-struct Frame *frame = NULL;    /* base of stack frames; dynamically allocated */
+struct Frame *frame = nil;     /* base of stack frames; dynamically allocated */
 int    nframe = 0;             /* number of frames allocated */
-struct Frame *fp = NULL;       /* frame pointer. bottom level unused */
+struct Frame *fp = nil;        /* frame pointer. bottom level unused */
 
-Cell *call(Node **a, int n)    /* function call.  very kludgy and fragile */
+Cell *call(Node **a, int)      /* function call.  very kludgy and fragile */
 {
        static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE };
        int i, ncall, ndef;
@@ -231,25 +240,25 @@ Cell *call(Node **a, int n)       /* function call.  very kludgy and fragile */
        s = fcn->nval;
        if (!isfcn(fcn))
                FATAL("calling undefined function %s", s);
-       if (frame == NULL) {
+       if (frame == nil) {
                fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame));
-               if (frame == NULL)
+               if (frame == nil)
                        FATAL("out of space for stack frames calling %s", s);
        }
-       for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)      /* args in call */
+       for (ncall = 0, x = a[1]; x != nil; x = x->nnext)       /* args in call */
                ncall++;
        ndef = (int) fcn->fval;                 /* args in defn */
-          dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
+          dprint( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) );
        if (ncall > ndef)
                WARNING("function %s called with %d args, uses only %d",
                        s, ncall, ndef);
        if (ncall + ndef > NARGS)
                FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
-       for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {   /* get call args */
-                  dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
+       for (i = 0, x = a[1]; x != nil; i++, x = x->nnext) {    /* get call args */
+                  dprint( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) );
                y = execute(x);
                oargs[i] = y;
-                  dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
+                  dprint( ("args[%d]: %s %f <%s>, t=%o\n",
                           i, y->nval, y->fval, isarr(y) ? "(array)" : y->sval, y->tval) );
                if (isfcn(y))
                        FATAL("can't use function %s as argument in %s", y->nval, s);
@@ -257,7 +266,8 @@ Cell *call(Node **a, int n) /* function call.  very kludgy and fragile */
                        args[i] = y;    /* arrays by ref */
                else
                        args[i] = copycell(y);
-               tempfree(y);
+                       if (istemp(y))
+                               tfree(y);
        }
        for ( ; i < ndef; i++) {        /* add null args for ones not provided */
                args[i] = gettemp();
@@ -268,7 +278,7 @@ Cell *call(Node **a, int n) /* function call.  very kludgy and fragile */
                int dfp = fp - frame;   /* old index */
                frame = (struct Frame *)
                        realloc((char *) frame, (nframe += 100) * sizeof(struct Frame));
-               if (frame == NULL)
+               if (frame == nil)
                        FATAL("out of space for stack frames in %s", s);
                fp = frame + dfp;
        }
@@ -277,9 +287,9 @@ Cell *call(Node **a, int n) /* function call.  very kludgy and fragile */
        fp->nargs = ndef;       /* number defined with (excess are locals) */
        fp->retval = gettemp();
 
-          dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+       dprint( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) );
        y = execute((Node *)(fcn->sval));       /* execute body */
-          dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
+       dprint( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) );
 
        for (i = 0; i < ndef; i++) {
                Cell *t = fp->args[i];
@@ -288,25 +298,30 @@ Cell *call(Node **a, int n)       /* function call.  very kludgy and fragile */
                                if (i >= ncall) {
                                        freesymtab(t);
                                        t->csub = CTEMP;
-                                       tempfree(t);
+                               if (istemp(t))
+                                       tfree(t);
                                } else {
                                        oargs[i]->tval = t->tval;
                                        oargs[i]->tval &= ~(STR|NUM|DONTFREE);
                                        oargs[i]->sval = t->sval;
-                                       tempfree(t);
+                                       if (istemp(t))
+                                               tfree(t);
                                }
                        }
                } else if (t != y) {    /* kludge to prevent freeing twice */
                        t->csub = CTEMP;
-                       tempfree(t);
+                       if (istemp(t))
+                               tfree(t);
                }
        }
-       tempfree(fcn);
+       if (istemp(fcn))
+               tfree(fcn);
        if (isexit(y) || isnext(y) || isnextfile(y))
                return y;
-       tempfree(y);            /* this can free twice! */
+       if (istemp(y))
+               tfree(y);               /* this can free twice! */
        z = fp->retval;                 /* return value */
-          dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
+          dprint( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
        fp--;
        return(z);
 }
@@ -318,7 +333,7 @@ Cell *copycell(Cell *x)     /* make a copy of a cell in a temp */
        y = gettemp();
        y->csub = CCOPY;        /* prevents freeing until call is over */
        y->nval = x->nval;      /* BUG? */
-       y->sval = x->sval ? tostring(x->sval) : NULL;
+       y->sval = x->sval ? tostring(x->sval) : nil;
        y->fval = x->fval;
        y->tval = x->tval & ~(CON|FLD|REC|DONTFREE);    /* copy is not constant or field */
                                                        /* is DONTFREE right? */
@@ -329,7 +344,7 @@ Cell *arg(Node **a, int n)  /* nth argument of a function */
 {
 
        n = ptoi(a[0]); /* argument number, counting from 0 */
-          dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
+          dprint( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) );
        if (n+1 > fp->nargs)
                FATAL("argument #%d of function %s was not supplied",
                        n+1, fp->fcncell->nval);
@@ -342,14 +357,15 @@ Cell *jump(Node **a, int n)       /* break, continue, next, nextfile, return */
 
        switch (n) {
        case EXIT:
-               if (a[0] != NULL) {
+               if (a[0] != nil) {
                        y = execute(a[0]);
                        errorflag = (int) getfval(y);
-                       tempfree(y);
+                       if (istemp(y))
+                               tfree(y);
                }
                longjmp(env, 1);
        case RETURN:
-               if (a[0] != NULL) {
+               if (a[0] != nil) {
                        y = execute(a[0]);
                        if ((y->tval & (STR|NUM)) == (STR|NUM)) {
                                setsval(fp->retval, getsval(y));
@@ -362,7 +378,8 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
                                setfval(fp->retval, getfval(y));
                        else            /* can't happen */
                                FATAL("bad type variable %d", y->tval);
-                       tempfree(y);
+                       if (istemp(y))
+                               tfree(y);
                }
                return(jret);
        case NEXT:
@@ -384,33 +401,35 @@ Cell *getline(Node **a, int n)    /* get next line from specific input */
 {              /* a[0] is variable, a[1] is operator, a[2] is filename */
        Cell *r, *x;
        extern Cell **fldtab;
-       FILE *fp;
+       Biobuf *fp;
        char *buf;
        int bufsize = recsize;
        int mode;
 
-       if ((buf = (char *) malloc(bufsize)) == NULL)
+       if ((buf = (char *) malloc(bufsize)) == nil)
                FATAL("out of memory in getline");
 
-       fflush(stdout); /* in case someone is waiting for a prompt */
+       Bflush(&stdout);        /* in case someone is waiting for a prompt */
        r = gettemp();
-       if (a[1] != NULL) {             /* getline < file */
+       if (a[1] != nil) {              /* getline < file */
                x = execute(a[2]);              /* filename */
                mode = ptoi(a[1]);
                if (mode == '|')                /* input pipe */
                        mode = LE;      /* arbitrary flag */
                fp = openfile(mode, getsval(x));
-               tempfree(x);
-               if (fp == NULL)
+               if (istemp(x))
+                       tfree(x);
+               if (fp == nil)
                        n = -1;
                else
                        n = readrec(&buf, &bufsize, fp);
                if (n <= 0) {
                        ;
-               } else if (a[0] != NULL) {      /* getline var <file */
+               } else if (a[0] != nil) {       /* getline var <file */
                        x = execute(a[0]);
                        setsval(x, buf);
-                       tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
                } else {                        /* getline <file */
                        setsval(fldtab[0], buf);
                        if (is_number(fldtab[0]->sval)) {
@@ -419,13 +438,14 @@ Cell *getline(Node **a, int n)    /* get next line from specific input */
                        }
                }
        } else {                        /* bare getline; use current input */
-               if (a[0] == NULL)       /* getline */
+               if (a[0] == nil)        /* getline */
                        n = getrec(&record, &recsize, 1);
                else {                  /* getline var */
                        n = getrec(&buf, &bufsize, 0);
                        x = execute(a[0]);
                        setsval(x, buf);
-                       tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
                }
        }
        setfval(r, (Awkfloat) n);
@@ -433,14 +453,14 @@ Cell *getline(Node **a, int n)    /* get next line from specific input */
        return r;
 }
 
-Cell *getnf(Node **a, int n)   /* get NF */
+Cell *getnf(Node **a, int)     /* get NF */
 {
        if (donefld == 0)
                fldbld();
        return (Cell *) a[0];
 }
 
-Cell *array(Node **a, int n)   /* a[0] is symtab, a[1] is list of subscripts */
+Cell *array(Node **a, int)     /* a[0] is symtab, a[1] is list of subscripts */
 {
        Cell *x, *y, *z;
        char *s;
@@ -449,7 +469,7 @@ Cell *array(Node **a, int n)        /* a[0] is symtab, a[1] is list of subscripts */
        int bufsz = recsize;
        int nsub = strlen(*SUBSEP);
 
-       if ((buf = (char *) malloc(bufsz)) == NULL)
+       if ((buf = (char *) malloc(bufsz)) == nil)
                FATAL("out of memory in array");
 
        x = execute(a[0]);      /* Cell* for symbol table */
@@ -462,10 +482,11 @@ Cell *array(Node **a, int n)      /* a[0] is symtab, a[1] is list of subscripts */
                strcat(buf, s);
                if (np->nnext)
                        strcat(buf, *SUBSEP);
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
        }
        if (!isarr(x)) {
-                  dprintf( ("making %s into an array\n", x->nval) );
+                  dprint( ("making %s into an array\n", x->nval) );
                if (freeable(x))
                        xfree(x->sval);
                x->tval &= ~(STR|NUM|DONTFREE);
@@ -475,12 +496,13 @@ Cell *array(Node **a, int n)      /* a[0] is symtab, a[1] is list of subscripts */
        z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
        z->ctype = OCELL;
        z->csub = CVAR;
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        free(buf);
        return(z);
 }
 
-Cell *awkdelete(Node **a, int n)       /* a[0] is symtab, a[1] is list of subscripts */
+Cell *awkdelete(Node **a, int) /* a[0] is symtab, a[1] is list of subscripts */
 {
        Cell *x, *y;
        Node *np;
@@ -498,7 +520,7 @@ Cell *awkdelete(Node **a, int n)    /* a[0] is symtab, a[1] is list of subscripts *
        } else {
                int bufsz = recsize;
                char *buf;
-               if ((buf = (char *) malloc(bufsz)) == NULL)
+               if ((buf = (char *) malloc(bufsz)) == nil)
                        FATAL("out of memory in adelete");
                buf[0] = 0;
                for (np = a[1]; np; np = np->nnext) {
@@ -509,16 +531,18 @@ Cell *awkdelete(Node **a, int n)  /* a[0] is symtab, a[1] is list of subscripts *
                        strcat(buf, s); 
                        if (np->nnext)
                                strcat(buf, *SUBSEP);
-                       tempfree(y);
+                       if (istemp(y))
+                               tfree(y);
                }
                freeelem(x, buf);
                free(buf);
        }
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return True;
 }
 
-Cell *intest(Node **a, int n)  /* a[0] is index (list), a[1] is symtab */
+Cell *intest(Node **a, int)    /* a[0] is index (list), a[1] is symtab */
 {
        Cell *x, *ap, *k;
        Node *p;
@@ -529,14 +553,14 @@ Cell *intest(Node **a, int n)     /* a[0] is index (list), a[1] is symtab */
 
        ap = execute(a[1]);     /* array name */
        if (!isarr(ap)) {
-                  dprintf( ("making %s into an array\n", ap->nval) );
+                  dprint( ("making %s into an array\n", ap->nval) );
                if (freeable(ap))
                        xfree(ap->sval);
                ap->tval &= ~(STR|NUM|DONTFREE);
                ap->tval |= ARR;
                ap->sval = (char *) makesymtab(NSYMTAB);
        }
-       if ((buf = (char *) malloc(bufsz)) == NULL) {
+       if ((buf = (char *) malloc(bufsz)) == nil) {
                FATAL("out of memory in intest");
        }
        buf[0] = 0;
@@ -546,14 +570,16 @@ Cell *intest(Node **a, int n)     /* a[0] is index (list), a[1] is symtab */
                if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, 0))
                        FATAL("out of memory deleting %s[%s...]", x->nval, buf);
                strcat(buf, s);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                if (p->nnext)
                        strcat(buf, *SUBSEP);
        }
        k = lookup(buf, (Array *) ap->sval);
-       tempfree(ap);
+       if (istemp(ap))
+               tfree(ap);
        free(buf);
-       if (k == NULL)
+       if (k == nil)
                return(False);
        else
                return(True);
@@ -575,19 +601,21 @@ Cell *matchop(Node **a, int n)    /* ~ and match() */
                y = execute(a[2]);      /* a[2] = regular expr */
                t = getsval(y);
                p = compre(t);
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
        }
        if (n == MATCHFCN)
                i = pmatch(p, s, s);
        else
                i = match(p, s, s);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        if (n == MATCHFCN) {
-               int start = countposn(s, patbeg-s)+1;
+               int start = utfnlen(s, patbeg-s)+1;
                if (patlen < 0)
                        start = 0;
                setfval(rstartloc, (Awkfloat) start);
-               setfval(rlengthloc, (Awkfloat) countposn(patbeg, patlen));
+               setfval(rlengthloc, (Awkfloat) utfnlen(patbeg, patlen));
                x = gettemp();
                x->tval = NUM;
                x->fval = start;
@@ -606,20 +634,23 @@ Cell *boolop(Node **a, int n)     /* a[0] || a[1], a[0] && a[1], !a[0] */
 
        x = execute(a[0]);
        i = istrue(x);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        switch (n) {
        case BOR:
                if (i) return(True);
                y = execute(a[1]);
                i = istrue(y);
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
                if (i) return(True);
                else return(False);
        case AND:
                if ( !i ) return(False);
                y = execute(a[1]);
                i = istrue(y);
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
                if (i) return(True);
                else return(False);
        case NOT:
@@ -645,8 +676,10 @@ Cell *relop(Node **a, int n)       /* a[0 < a[1], etc. */
        } else {
                i = strcmp(getsval(x), getsval(y));
        }
-       tempfree(x);
-       tempfree(y);
+       if (istemp(x))
+               tfree(x);
+       if (istemp(y))
+               tfree(y);
        switch (n) {
        case LT:        if (i<0) return(True);
                        else return(False);
@@ -669,7 +702,7 @@ Cell *relop(Node **a, int n)        /* a[0 < a[1], etc. */
 void tfree(Cell *a)    /* free a tempcell */
 {
        if (freeable(a)) {
-                  dprintf( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) );
+                  dprint( ("freeing %s %s %o\n", a->nval, a->sval, a->tval) );
                xfree(a->sval);
        }
        if (a == tmps)
@@ -696,7 +729,7 @@ Cell *gettemp(void) /* get a tempcell */
        return(x);
 }
 
-Cell *indirect(Node **a, int n)        /* $( a[0] ) */
+Cell *indirect(Node **a, int)  /* $( a[0] ) */
 {
        Cell *x;
        int m;
@@ -707,16 +740,18 @@ Cell *indirect(Node **a, int n)   /* $( a[0] ) */
        if (m == 0 && !is_number(s = getsval(x)))       /* suspicion! */
                FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
                /* BUG: can x->nval ever be null??? */
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        x = fieldadr(m);
        x->ctype = OCELL;       /* BUG?  why are these needed? */
        x->csub = CFLD;
        return(x);
 }
 
-Cell *substr(Node **a, int nnn)                /* substr(a[0], a[1], a[2]) */
+Cell *substr(Node **a, int)            /* substr(a[0], a[1], a[2]) */
 {
        int k, m, n;
+       Rune r;
        char *s, *p;
        int temp;
        Cell *x, *y, *z = 0;
@@ -726,12 +761,16 @@ Cell *substr(Node **a, int nnn)           /* substr(a[0], a[1], a[2]) */
        if (a[2] != 0)
                z = execute(a[2]);
        s = getsval(x);
-       k = countposn(s, strlen(s)) + 1;
+       k = utfnlen(s, strlen(s)) + 1;
        if (k <= 1) {
-               tempfree(x);
-               tempfree(y);
-               if (a[2] != 0)
-                       tempfree(z);
+               if (istemp(x))
+                       tfree(x);
+               if (istemp(y))
+                       tfree(y);
+               if (a[2] != 0) {
+                       if (istemp(z))
+                               tfree(z);
+               }
                x = gettemp();
                setsval(x, "");
                return(x);
@@ -741,31 +780,34 @@ Cell *substr(Node **a, int nnn)           /* substr(a[0], a[1], a[2]) */
                m = 1;
        else if (m > k)
                m = k;
-       tempfree(y);
+       if (istemp(y))
+               tfree(y);
        if (a[2] != 0) {
                n = (int) getfval(z);
-               tempfree(z);
+               if (istemp(z))
+                       tfree(z);
        } else
                n = k - 1;
        if (n < 0)
                n = 0;
        else if (n > k - m)
                n = k - m;
-          dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
+       dprint( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
        y = gettemp();
        while (*s && --m)
-                s += mblen(s, k);
-       for (p = s; *p && n--; p += mblen(p, k))
+               s += chartorune(&r, s);
+       for (p = s; *p && n--; p += chartorune(&r, p))
                        ;
        temp = *p;      /* with thanks to John Linderman */
        *p = '\0';
        setsval(y, s);
        *p = temp;
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return(y);
 }
 
-Cell *sindex(Node **a, int nnn)                /* index(a[0], a[1]) */
+Cell *sindex(Node **a, int)            /* index(a[0], a[1]) */
 {
        Cell *x, *y, *z;
        char *s1, *s2, *p1, *p2, *q;
@@ -781,12 +823,14 @@ Cell *sindex(Node **a, int nnn)           /* index(a[0], a[1]) */
                for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
                        ;
                if (*p2 == '\0') {
-                       v = (Awkfloat) countposn(s1, p1-s1) + 1;        /* origin 1 */
+                       v = (Awkfloat) utfnlen(s1, p1-s1) + 1;  /* origin 1 */
                        break;
                }
        }
-       tempfree(x);
-       tempfree(y);
+       if (istemp(x))
+               tfree(x);
+       if (istemp(y))
+               tfree(y);
        setfval(z, v);
        return(z);
 }
@@ -798,7 +842,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)    /* printf-like conversi
        char *fmt;
        char *p, *t, *os;
        Cell *x;
-       int flag = 0, n;
+       int flag, n;
        int fmtwd; /* format width */
        int fmtsz = recsize;
        char *buf = *pbuf;
@@ -806,7 +850,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)    /* printf-like conversi
 
        os = s;
        p = buf;
-       if ((fmt = (char *) malloc(fmtsz)) == NULL)
+       if ((fmt = (char *) malloc(fmtsz)) == nil)
                FATAL("out of memory in format()");
        while (*s) {
                adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format");
@@ -832,12 +876,13 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)  /* printf-like conversi
                        if (*s == '*') {
                                x = execute(a);
                                a = a->nnext;
-                               sprintf(t-1, "%d", fmtwd=(int) getfval(x));
+                               sprint(t-1, "%d", fmtwd=(int) getfval(x));
                                if (fmtwd < 0)
                                        fmtwd = -fmtwd;
                                adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
                                t = fmt + strlen(fmt);
-                               tempfree(x);
+                               if (istemp(x))
+                                       tfree(x);
                        }
                }
                *t = '\0';
@@ -856,7 +901,13 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)   /* printf-like conversi
                        *t = 'd';
                        *++t = '\0';
                        break;
-               case 'o': case 'x': case 'X': case 'u':
+               case 'u':
+                       flag = *(s-1) == 'l' ? 2 : 3;
+                       *t++ = 'u';
+                       *t++ = 'd';
+                       *t = '\0';
+                       break;                          
+               case 'o': case 'x': case 'X':
                        flag = *(s-1) == 'l' ? 2 : 3;
                        break;
                case 's':
@@ -870,7 +921,7 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)    /* printf-like conversi
                        flag = 0;
                        break;
                }
-               if (a == NULL)
+               if (a == nil)
                        FATAL("not enough args in printf(%s)", os);
                x = execute(a);
                a = a->nnext;
@@ -879,18 +930,18 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)  /* printf-like conversi
                        n = fmtwd;
                adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format");
                switch (flag) {
-               case 0: sprintf(p, "%s", fmt);  /* unknown, so dump it too */
+               case 0: sprint(p, "%s", fmt);   /* unknown, so dump it too */
                        t = getsval(x);
                        n = strlen(t);
                        if (fmtwd > n)
                                n = fmtwd;
                        adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format");
                        p += strlen(p);
-                       sprintf(p, "%s", t);
+                       sprint(p, "%s", t);
                        break;
-               case 1: sprintf(p, fmt, getfval(x)); break;
-               case 2: sprintf(p, fmt, (long) getfval(x)); break;
-               case 3: sprintf(p, fmt, (int) getfval(x)); break;
+               case 1: sprint(p, fmt, getfval(x)); break;
+               case 2: sprint(p, fmt, (long) getfval(x)); break;
+               case 3: sprint(p, fmt, (int) getfval(x)); break;
                case 4:
                        t = getsval(x);
                        n = strlen(t);
@@ -898,21 +949,26 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)  /* printf-like conversi
                                n = fmtwd;
                        if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, 0))
                                FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
-                       sprintf(p, fmt, t);
+                       sprint(p, fmt, t);
                        break;
                case 5:
                        if (isnum(x)) {
-                               if (getfval(x))
-                                       sprintf(p, fmt, (int) getfval(x));
-                               else{
+                               if (getfval(x)) {
+                                       *p++ = (uchar)getfval(x);
+                                       *p = '\0';
+                               } else {
                                        *p++ = '\0';
                                        *p = '\0';
                                }
-                       } else
-                               sprintf(p, fmt, getsval(x)[0]);
+                       } else {
+                               if((*p = getsval(x)[0]) != '\0')
+                                       p++;
+                               *p = '\0';
+                       }
                        break;
                }
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                p += strlen(p);
                s++;
        }
@@ -925,55 +981,56 @@ int format(char **pbuf, int *pbufsize, char *s, Node *a)  /* printf-like conversi
        return p - buf;
 }
 
-Cell *awksprintf(Node **a, int n)              /* sprintf(a[0]) */
+Cell *awksprintf(Node **a, int)                /* sprint(a[0]) */
 {
        Cell *x;
        Node *y;
        char *buf;
        int bufsz=3*recsize;
 
-       if ((buf = (char *) malloc(bufsz)) == NULL)
-               FATAL("out of memory in awksprintf");
+       if ((buf = (char *) malloc(bufsz)) == nil)
+               FATAL("out of memory in awksprint");
        y = a[0]->nnext;
        x = execute(a[0]);
        if (format(&buf, &bufsz, getsval(x), y) == -1)
-               FATAL("sprintf string %.30s... too long.  can't happen.", buf);
-       tempfree(x);
+               FATAL("sprint string %.30s... too long.  can't happen.", buf);
+       if (istemp(x))
+               tfree(x);
        x = gettemp();
        x->sval = buf;
        x->tval = STR;
        return(x);
 }
 
-Cell *awkprintf(Node **a, int n)               /* printf */
+Cell *awkprintf(Node **a, int)         /* printf */
 {      /* a[0] is list of args, starting with format string */
        /* a[1] is redirection operator, a[2] is redirection file */
-       FILE *fp;
+       Biobuf *fp;
        Cell *x;
        Node *y;
        char *buf;
        int len;
        int bufsz=3*recsize;
 
-       if ((buf = (char *) malloc(bufsz)) == NULL)
+       if ((buf = (char *) malloc(bufsz)) == nil)
                FATAL("out of memory in awkprintf");
        y = a[0]->nnext;
        x = execute(a[0]);
        if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
                FATAL("printf string %.30s... too long.  can't happen.", buf);
-       tempfree(x);
-       if (a[1] == NULL) {
+       if (istemp(x))
+               tfree(x);
+       if (a[1] == nil) {
                /* fputs(buf, stdout); */
-               fwrite(buf, len, 1, stdout);
-               if (ferror(stdout))
+               if (Bwrite(&stdout, buf, len) < 0)
                        FATAL("write error on stdout");
+               Bflush(&stdout);
        } else {
                fp = redirect(ptoi(a[1]), a[2]);
                /* fputs(buf, fp); */
-               fwrite(buf, len, 1, fp);
-               fflush(fp);
-               if (ferror(fp))
+               if(Bwrite(fp, buf, len) < 0)
                        FATAL("write error on %s", filename(fp));
+               Bflush(fp);
        }
        free(buf);
        return(True);
@@ -987,11 +1044,13 @@ Cell *arith(Node **a, int n)     /* a[0] + a[1], etc.  also -a[0] */
 
        x = execute(a[0]);
        i = getfval(x);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        if (n != UMINUS) {
                y = execute(a[1]);
                j = getfval(y);
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
        }
        z = gettemp();
        switch (n) {
@@ -1060,7 +1119,8 @@ Cell *incrdecr(Node **a, int n)           /* a[0]++, etc. */
        z = gettemp();
        setfval(z, xf);
        setfval(x, xf + k);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return(z);
 }
 
@@ -1074,8 +1134,8 @@ Cell *assign(Node **a, int n)     /* a[0] = a[1], a[0] += a[1], etc. */
        x = execute(a[0]);
        if (n == ASSIGN) {      /* ordinary assignment */
                if (x == y && !(x->tval & (FLD|REC)))   /* self-assignment: */
-                                     /* leave alone unless it's a field */
-               else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
+                       goto Free;              /* leave alone unless it's a field */
+               if ((y->tval & (STR|NUM)) == (STR|NUM)) {
                        setsval(x, getsval(y));
                        x->fval = getfval(y);
                        x->tval |= NUM;
@@ -1086,7 +1146,9 @@ Cell *assign(Node **a, int n)     /* a[0] = a[1], a[0] += a[1], etc. */
                        setfval(x, getfval(y));
                else
                        funnyvar(y, "read value of");
-               tempfree(y);
+Free:
+               if (istemp(y))
+                       tfree(y);
                return(x);
        }
        xf = getfval(x);
@@ -1122,12 +1184,13 @@ Cell *assign(Node **a, int n)   /* a[0] = a[1], a[0] += a[1], etc. */
                FATAL("illegal assignment operator %d", n);
                break;
        }
-       tempfree(y);
+       if (istemp(y))
+               tfree(y);
        setfval(x, xf);
        return(x);
 }
 
-Cell *cat(Node **a, int q)     /* a[0] cat a[1] */
+Cell *cat(Node **a, int)       /* a[0] cat a[1] */
 {
        Cell *x, *y, *z;
        int n1, n2;
@@ -1140,20 +1203,22 @@ Cell *cat(Node **a, int q)      /* a[0] cat a[1] */
        n1 = strlen(x->sval);
        n2 = strlen(y->sval);
        s = (char *) malloc(n1 + n2 + 1);
-       if (s == NULL)
+       if (s == nil)
                FATAL("out of space concatenating %.15s... and %.15s...",
                        x->sval, y->sval);
        strcpy(s, x->sval);
        strcpy(s+n1, y->sval);
-       tempfree(y);
+       if (istemp(y))
+               tfree(y);
        z = gettemp();
        z->sval = s;
        z->tval = STR;
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return(z);
 }
 
-Cell *pastat(Node **a, int n)  /* a[0] { a[1] } */
+Cell *pastat(Node **a, int)    /* a[0] { a[1] } */
 {
        Cell *x;
 
@@ -1162,14 +1227,15 @@ Cell *pastat(Node **a, int n)   /* a[0] { a[1] } */
        else {
                x = execute(a[0]);
                if (istrue(x)) {
-                       tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
                        x = execute(a[1]);
                }
        }
        return x;
 }
 
-Cell *dopa2(Node **a, int n)   /* a[0], a[1] { a[2] } */
+Cell *dopa2(Node **a, int)     /* a[0], a[1] { a[2] } */
 {
        Cell *x;
        int pair;
@@ -1179,25 +1245,27 @@ Cell *dopa2(Node **a, int n)    /* a[0], a[1] { a[2] } */
                x = execute(a[0]);
                if (istrue(x))
                        pairstack[pair] = 1;
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        }
        if (pairstack[pair] == 1) {
                x = execute(a[1]);
                if (istrue(x))
                        pairstack[pair] = 0;
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[2]);
                return(x);
        }
        return(False);
 }
 
-Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
+Cell *split(Node **a, int)     /* split(a[0], a[1], a[2]); a[3] is type */
 {
        Cell *x = 0, *y, *ap;
        char *s, *t, *fs = 0;
        char temp, num[50];
-       int n, nb, sep, tempstat, arg3type;
+       int n, nb, sep, arg3type;
 
        y = execute(a[0]);      /* source string */
        s = getsval(y);
@@ -1217,7 +1285,7 @@ Cell *split(Node **a, int nnn)    /* split(a[0], a[1], a[2]); a[3] is type */
        y->tval |= DONTFREE;    /* split(a[x], a); */
        freesymtab(ap);
        y->tval = n;
-          dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) );
+          dprint( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) );
        ap->tval &= ~STR;
        ap->tval |= ARR;
        ap->sval = (char *) makesymtab(NSYMTAB);
@@ -1234,7 +1302,7 @@ Cell *split(Node **a, int nnn)    /* split(a[0], a[1], a[2]); a[3] is type */
                if (nematch(p,s,t)) {
                        do {
                                n++;
-                               sprintf(num, "%d", n);
+                               sprint(num, "%d", n);
                                temp = *patbeg;
                                *patbeg = '\0';
                                if (is_number(t))
@@ -1245,20 +1313,21 @@ Cell *split(Node **a, int nnn)  /* split(a[0], a[1], a[2]); a[3] is type */
                                t = patbeg + patlen;
                                if (t[-1] == 0 || *t == 0) {
                                        n++;
-                                       sprintf(num, "%d", n);
+                                       sprint(num, "%d", n);
                                        setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
                                        goto spdone;
                                }
                        } while (nematch(p,s,t));
                }
                n++;
-               sprintf(num, "%d", n);
+               sprint(num, "%d", n);
                if (is_number(t))
                        setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
                else
                        setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
   spdone:
-               p = NULL;
+               p = nil;
+               USED(p);
        } else if (sep == ' ') {
                for (n = 0; ; ) {
                        while (*s == ' ' || *s == '\t' || *s == '\n')
@@ -1272,7 +1341,7 @@ Cell *split(Node **a, int nnn)    /* split(a[0], a[1], a[2]); a[3] is type */
                        while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
                        temp = *s;
                        *s = '\0';
-                       sprintf(num, "%d", n);
+                       sprint(num, "%d", n);
                        if (is_number(t))
                                setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
                        else
@@ -1287,7 +1356,7 @@ Cell *split(Node **a, int nnn)    /* split(a[0], a[1], a[2]); a[3] is type */
                        char buf[UTFmax+1];
 
                        n++;
-                       snprintf(num, sizeof num, "%d", n);
+                       snprint(num, sizeof num, "%d", n);
                        nb = chartorune(&r, s);
                        memmove(buf, s, nb);
                        buf[nb] = '\0';
@@ -1304,7 +1373,7 @@ Cell *split(Node **a, int nnn)    /* split(a[0], a[1], a[2]); a[3] is type */
                                s++;
                        temp = *s;
                        *s = '\0';
-                       sprintf(num, "%d", n);
+                       sprint(num, "%d", n);
                        if (is_number(t))
                                setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
                        else
@@ -1314,47 +1383,54 @@ Cell *split(Node **a, int nnn)  /* split(a[0], a[1], a[2]); a[3] is type */
                                break;
                }
        }
-       tempfree(ap);
-       tempfree(y);
+       if (istemp(ap))
+               tfree(ap);
+       if (istemp(y))
+               tfree(y);
        if (a[2] != 0 && arg3type == STRING)
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        x = gettemp();
        x->tval = NUM;
        x->fval = n;
        return(x);
 }
 
-Cell *condexpr(Node **a, int n)        /* a[0] ? a[1] : a[2] */
+Cell *condexpr(Node **a, int)  /* a[0] ? a[1] : a[2] */
 {
        Cell *x;
 
        x = execute(a[0]);
        if (istrue(x)) {
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[1]);
        } else {
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[2]);
        }
        return(x);
 }
 
-Cell *ifstat(Node **a, int n)  /* if (a[0]) a[1]; else a[2] */
+Cell *ifstat(Node **a, int)    /* if (a[0]) a[1]; else a[2] */
 {
        Cell *x;
 
        x = execute(a[0]);
        if (istrue(x)) {
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[1]);
        } else if (a[2] != 0) {
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[2]);
        }
        return(x);
 }
 
-Cell *whilestat(Node **a, int n)       /* while (a[0]) a[1] */
+Cell *whilestat(Node **a, int) /* while (a[0]) a[1] */
 {
        Cell *x;
 
@@ -1362,7 +1438,8 @@ Cell *whilestat(Node **a, int n)  /* while (a[0]) a[1] */
                x = execute(a[0]);
                if (!istrue(x))
                        return(x);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[1]);
                if (isbreak(x)) {
                        x = True;
@@ -1370,11 +1447,12 @@ Cell *whilestat(Node **a, int n)        /* while (a[0]) a[1] */
                }
                if (isnext(x) || isexit(x) || isret(x))
                        return(x);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        }
 }
 
-Cell *dostat(Node **a, int n)  /* do a[0]; while(a[1]) */
+Cell *dostat(Node **a, int)    /* do a[0]; while(a[1]) */
 {
        Cell *x;
 
@@ -1384,38 +1462,45 @@ Cell *dostat(Node **a, int n)   /* do a[0]; while(a[1]) */
                        return True;
                if (isnext(x) || isnextfile(x) || isexit(x) || isret(x))
                        return(x);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[1]);
                if (!istrue(x))
                        return(x);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        }
 }
 
-Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
+Cell *forstat(Node **a, int)   /* for (a[0]; a[1]; a[2]) a[3] */
 {
        Cell *x;
 
        x = execute(a[0]);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        for (;;) {
                if (a[1]!=0) {
                        x = execute(a[1]);
-                       if (!istrue(x)) return(x);
-                       else tempfree(x);
+                       if (!istrue(x))
+                               return(x);
+                       else if (istemp(x))
+                               tfree(x);
                }
                x = execute(a[3]);
                if (isbreak(x))         /* turn off break */
                        return True;
                if (isnext(x) || isexit(x) || isret(x))
                        return(x);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = execute(a[2]);
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
        }
 }
 
-Cell *instat(Node **a, int n)  /* for (a[0] in a[1]) a[2] */
+Cell *instat(Node **a, int)    /* for (a[0] in a[1]) a[2] */
 {
        Cell *x, *vp, *arrayp, *cp, *ncp;
        Array *tp;
@@ -1427,36 +1512,40 @@ Cell *instat(Node **a, int n)   /* for (a[0] in a[1]) a[2] */
                return True;
        }
        tp = (Array *) arrayp->sval;
-       tempfree(arrayp);
+       if (istemp(arrayp))
+               tfree(arrayp);
        for (i = 0; i < tp->size; i++) {        /* this routine knows too much */
-               for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
+               for (cp = tp->tab[i]; cp != nil; cp = ncp) {
                        setsval(vp, cp->nval);
                        ncp = cp->cnext;
                        x = execute(a[2]);
                        if (isbreak(x)) {
-                               tempfree(vp);
+                               if (istemp(vp))
+                                       tfree(vp);
                                return True;
                        }
                        if (isnext(x) || isexit(x) || isret(x)) {
-                               tempfree(vp);
+                               if (istemp(vp))
+                                       tfree(vp);
                                return(x);
                        }
-                       tempfree(x);
+                       if (istemp(x))
+                               tfree(x);
                }
        }
        return True;
 }
 
-Cell *bltin(Node **a, int n)   /* builtin functions. a[0] is type, a[1] is arg list */
+Cell *bltin(Node **a, int)     /* builtin functions. a[0] is type, a[1] is arg list */
 {
        Cell *x, *y;
        Awkfloat u;
        int t;
-       wchar_t wc;
+       Rune wc;
        char *p, *buf;
        char mbc[50];
        Node *nextarg;
-       FILE *fp;
+       Biobuf *fp;
        void flush_all(void);
 
        t = ptoi(a[0]);
@@ -1465,10 +1554,10 @@ Cell *bltin(Node **a, int n)    /* builtin functions. a[0] is type, a[1] is arg lis
        switch (t) {
        case FLENGTH:
                if (isarr(x))
-                       u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
+                       u = ((Array *) x->sval)->nelemt;        /* GROT. should be function*/
                else {
                        p = getsval(x);
-                       u = (Awkfloat) countposn(p, strlen(p));
+                       u = (Awkfloat) utfnlen(p, strlen(p));
                }
                break;
        case FLOG:
@@ -1490,12 +1579,13 @@ Cell *bltin(Node **a, int n)    /* builtin functions. a[0] is type, a[1] is arg lis
                } else {
                        y = execute(a[1]->nnext);
                        u = atan2(getfval(x), getfval(y));
-                       tempfree(y);
+                       if (istemp(y))
+                               tfree(y);
                        nextarg = nextarg->nnext;
                }
                break;
        case FSYSTEM:
-               fflush(stdout);         /* in case something is buffered already */
+               Bflush(&stdout);                /* in case something is buffered already */
                u = (Awkfloat) system(getsval(x)) / 256;   /* 256 is unix-dep */
                break;
        case FRAND:
@@ -1504,7 +1594,7 @@ Cell *bltin(Node **a, int n)      /* builtin functions. a[0] is type, a[1] is arg lis
                break;
        case FSRAND:
                if (isrec(x))   /* no argument provided */
-                       u = time((time_t *)0);
+                       u = time(nil);
                else
                        u = getfval(x);
                srand((unsigned int) u);
@@ -1521,7 +1611,8 @@ Cell *bltin(Node **a, int n)      /* builtin functions. a[0] is type, a[1] is arg lis
                                if (isupper(*p))
                                        *p = tolower(*p);
                }
-               tempfree(x);
+               if (istemp(x))
+                       tfree(x);
                x = gettemp();
                setsval(x, buf);
                free(buf);
@@ -1530,15 +1621,16 @@ Cell *bltin(Node **a, int n)    /* builtin functions. a[0] is type, a[1] is arg lis
                if (isrec(x) || strlen(getsval(x)) == 0) {
                        flush_all();    /* fflush() or fflush("") -> all */
                        u = 0;
-               } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
-                       u = EOF;
+               } else if ((fp = openfile(FFLUSH, getsval(x))) == nil)
+                       u = Beof;
                else
-                       u = fflush(fp);
+                       u = Bflush(fp);
                break;
        case FUTF:
                wc = (int)getfval(x);
-               mbc[wctomb(mbc, wc)] = 0;
-               tempfree(x);
+               mbc[runetochar(mbc, &wc)] = 0;
+               if (istemp(x))
+                       tfree(x);
                x = gettemp();
                setsval(x, mbc);
                return x;
@@ -1546,7 +1638,8 @@ Cell *bltin(Node **a, int n)      /* builtin functions. a[0] is type, a[1] is arg lis
                FATAL("illegal function type %d", t);
                break;
        }
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        x = gettemp();
        setfval(x, u);
        if (nextarg != 0) {
@@ -1557,79 +1650,78 @@ Cell *bltin(Node **a, int n)    /* builtin functions. a[0] is type, a[1] is arg lis
        return(x);
 }
 
-Cell *printstat(Node **a, int n)       /* print a[0] */
+Cell *printstat(Node **a, int) /* print a[0] */
 {
        int r;
        Node *x;
        Cell *y;
-       FILE *fp;
+       Biobuf *fp;
 
        if (a[1] == 0)  /* a[1] is redirection operator, a[2] is file */
-               fp = stdout;
+               fp = &stdout;
        else
                fp = redirect(ptoi(a[1]), a[2]);
-       for (x = a[0]; x != NULL; x = x->nnext) {
+       for (x = a[0]; x != nil; x = x->nnext) {
                y = execute(x);
-               fputs(getsval(y), fp);
-               tempfree(y);
-               if (x->nnext == NULL)
-                       r = fputs(*ORS, fp);
+               Bwrite(fp, getsval(y), strlen(getsval(y)));
+               if (istemp(y))
+                       tfree(y);
+               if (x->nnext == nil)
+                       r = Bprint(fp, "%s", *ORS);
                else
-                       r = fputs(*OFS, fp);
-               if (r == EOF)
+                       r = Bprint(fp, "%s", *OFS);
+               if (r < 0)
                        FATAL("write error on %s", filename(fp));
        }
-       if (a[1] != 0)
-               if (fflush(fp) == EOF)
-                       FATAL("write error on %s", filename(fp));
+       if (Bflush(fp) < 0)
+               FATAL("write error on %s", filename(fp));
        return(True);
 }
 
-Cell *nullproc(Node **a, int n)
+Cell *nullproc(Node **, int)
 {
-       n = n;
-       a = a;
        return 0;
 }
 
 
-FILE *redirect(int a, Node *b) /* set up all i/o redirections */
+Biobuf *redirect(int a, Node *b)       /* set up all i/o redirections */
 {
-       FILE *fp;
+       Biobuf *fp;
        Cell *x;
        char *fname;
 
        x = execute(b);
        fname = getsval(x);
        fp = openfile(a, fname);
-       if (fp == NULL)
+       if (fp == nil)
                FATAL("can't open file %s", fname);
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return fp;
 }
 
 struct files {
-       FILE    *fp;
+       Biobuf  *fp;
        char    *fname;
        int     mode;   /* '|', 'a', 'w' => LE/LT, GT */
 } files[FOPEN_MAX] ={
-       { NULL,  "/dev/stdin",  LT },   /* watch out: don't free this! */
-       { NULL, "/dev/stdout", GT },
-       { NULL, "/dev/stderr", GT }
+       { nil,  "/dev/stdin",  LT },    /* watch out: don't free this! */
+       { nil, "/dev/stdout", GT },
+       { nil, "/dev/stderr", GT }
 };
 
 void stdinit(void)     /* in case stdin, etc., are not constants */
 {
-       files[0].fp = stdin;
-       files[1].fp = stdout;
-       files[2].fp = stderr;
+       files[0].fp = &stdin;
+       files[1].fp = &stdout;
+       files[2].fp = &stderr;
 }
 
-FILE *openfile(int a, char *us)
+Biobuf *openfile(int a, char *us)
 {
        char *s = us;
        int i, m;
-       FILE *fp = 0;
+       Biobuf *fp = nil;
 
        if (*s == '\0')
                FATAL("null file name in print or getline");
@@ -1641,29 +1733,30 @@ FILE *openfile(int a, char *us)
                                return files[i].fp;
                }
        if (a == FFLUSH)        /* didn't find it, so don't create it! */
-               return NULL;
+               return nil;
 
        for (i=0; i < FOPEN_MAX; i++)
                if (files[i].fp == 0)
                        break;
        if (i >= FOPEN_MAX)
                FATAL("%s makes too many open files", s);
-       fflush(stdout); /* force a semblance of order */
+       Bflush(&stdout);        /* force a semblance of order */
        m = a;
        if (a == GT) {
-               fp = fopen(s, "w");
+               fp = Bopen(s, OWRITE);
        } else if (a == APPEND) {
-               fp = fopen(s, "a");
+               fp = Bopen(s, OWRITE);
+               Bseek(fp, 0LL, 2);
                m = GT; /* so can mix > and >> */
        } else if (a == '|') {  /* output pipe */
-               fp = popen(s, "w");
+               fp = popen(s, OWRITE);
        } else if (a == LE) {   /* input pipe */
-               fp = popen(s, "r");
+               fp = popen(s, OREAD);
        } else if (a == LT) {   /* getline <file */
-               fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");       /* "-" is stdin */
+               fp = strcmp(s, "-") == 0 ? &stdin : Bopen(s, OREAD);    /* "-" is stdin */
        } else  /* can't happen */
                FATAL("illegal redirection %d", a);
-       if (fp != NULL) {
+       if (fp != nil) {
                files[i].fname = tostring(s);
                files[i].fp = fp;
                files[i].mode = m;
@@ -1671,7 +1764,7 @@ FILE *openfile(int a, char *us)
        return fp;
 }
 
-char *filename(FILE *fp)
+char *filename(Biobuf *fp)
 {
        int i;
 
@@ -1681,30 +1774,28 @@ char *filename(FILE *fp)
        return "???";
 }
 
-Cell *closefile(Node **a, int n)
+Cell *closefile(Node **a, int)
 {
        Cell *x;
        int i, stat;
 
-       n = n;
        x = execute(a[0]);
        getsval(x);
        for (i = 0; i < FOPEN_MAX; i++)
                if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
-                       if (ferror(files[i].fp))
-                               WARNING( "i/o error occurred on %s", files[i].fname );
                        if (files[i].mode == '|' || files[i].mode == LE)
                                stat = pclose(files[i].fp);
                        else
-                               stat = fclose(files[i].fp);
-                       if (stat == EOF)
+                               stat = Bterm(files[i].fp);
+                       if (stat == Beof)
                                WARNING( "i/o error occurred closing %s", files[i].fname );
                        if (i > 2)      /* don't do /dev/std... */
                                xfree(files[i].fname);
-                       files[i].fname = NULL;  /* watch out for ref thru this */
-                       files[i].fp = NULL;
+                       files[i].fname = nil;   /* watch out for ref thru this */
+                       files[i].fp = nil;
                }
-       tempfree(x);
+       if (istemp(x))
+               tfree(x);
        return(True);
 }
 
@@ -1714,13 +1805,11 @@ void closeall(void)
 
        for (i = 0; i < FOPEN_MAX; i++)
                if (files[i].fp) {
-                       if (ferror(files[i].fp))
-                               WARNING( "i/o error occurred on %s", files[i].fname );
                        if (files[i].mode == '|' || files[i].mode == LE)
                                stat = pclose(files[i].fp);
                        else
-                               stat = fclose(files[i].fp);
-                       if (stat == EOF)
+                               stat = Bterm(files[i].fp);
+                       if (stat < -1)
                                WARNING( "i/o error occurred while closing %s", files[i].fname );
                }
 }
@@ -1731,12 +1820,12 @@ void flush_all(void)
 
        for (i = 0; i < FOPEN_MAX; i++)
                if (files[i].fp)
-                       fflush(files[i].fp);
+                       Bflush(files[i].fp);
 }
 
 void backsub(char **pb_ptr, char **sptr_ptr);
 
-Cell *sub(Node **a, int nnn)   /* substitute command */
+Cell *sub(Node **a, int)       /* substitute command */
 {
        char *sptr, *pb, *q;
        Cell *x, *y, *result;
@@ -1744,7 +1833,7 @@ Cell *sub(Node **a, int nnn)      /* substitute command */
        void *p;
        int bufsz = recsize;
 
-       if ((buf = (char *) malloc(bufsz)) == NULL)
+       if ((buf = (char *) malloc(bufsz)) == nil)
                FATAL("out of memory in sub");
        x = execute(a[3]);      /* target string */
        t = getsval(x);
@@ -1753,7 +1842,8 @@ Cell *sub(Node **a, int nnn)      /* substitute command */
        else {
                y = execute(a[1]);
                p = compre(getsval(y));
-               tempfree(y);
+               if (istemp(y))
+                       tfree(y);
        }
        y = execute(a[2]);      /* replacement string */
        result = False;
@@ -1790,22 +1880,24 @@ Cell *sub(Node **a, int nnn)    /* substitute command */
                setsval(x, buf);        /* BUG: should be able to avoid copy */
                result = True;;
        }
-       tempfree(x);
-       tempfree(y);
+       if (istemp(x))
+               tfree(x);
+       if (istemp(y))
+               tfree(y);
        free(buf);
        return result;
 }
 
-Cell *gsub(Node **a, int nnn)  /* global substitute */
+Cell *gsub(Node **a, int)      /* global substitute */
 {
        Cell *x, *y;
-       char *rptr, *sptr, *t, *pb, *c;
+       char *rptr, *sptr, *t, *pb, *c, *s;
        char *buf;
        void *p;
        int mflag, num;
        int bufsz = recsize;
 
-       if ((buf = (char *)malloc(bufsz)) == NULL)
+       if ((buf = (char *)malloc(bufsz)) == nil)
                FATAL("out of memory in gsub");
        mflag = 0;      /* if mflag == 0, can replace empty string */
        num = 0;
@@ -1815,8 +1907,10 @@ Cell *gsub(Node **a, int nnn)    /* global substitute */
                p = (void *) a[1];      /* regular expression */
        else {
                y = execute(a[1]);
-               p = compre(getsval(y));
-               tempfree(y);
+               s = getsval(y);
+               p = compre(s);
+               if (istemp(y))
+                       tfree(y);
        }
        y = execute(a[2]);      /* replacement string */
        if (pmatch(p, t, c)) {
@@ -1886,8 +1980,10 @@ Cell *gsub(Node **a, int nnn)    /* global substitute */
                *pb = '\0';
                setsval(x, buf);        /* BUG: should be able to avoid copy + free */
        }
-       tempfree(x);
-       tempfree(y);
+       if (istemp(x))
+               tfree(x);
+       if (istemp(y))
+               tfree(y);
        x = gettemp();
        x->tval = NUM;
        x->fval = num;
index 48c7b886114ebe22c5c8e8c8c5f08de1e2528855..11a74c818928d12c8cc0e8550de9c45d27143c15 100644 (file)
@@ -22,12 +22,10 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 THIS SOFTWARE.
 ****************************************************************/
 
-#define        DEBUG
-#include <stdio.h>
-#include <math.h>
+#include <u.h>
+#include <libc.h>
 #include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
+#include <bio.h>
 #include "awk.h"
 #include "y.tab.h"
 
@@ -46,7 +44,7 @@ Awkfloat *NF;         /* number of fields in current record */
 Awkfloat *NR;          /* number of current record */
 Awkfloat *FNR;         /* number of current record in current file */
 char   **FILENAME;     /* current filename argument */
-Awkfloat *ARGC;                /* number of arguments from command line */
+Awkfloat *AARGC;               /* number of arguments from command line */
 char   **SUBSEP;       /* subscript separator for a[i,j,k]; default \034 */
 Awkfloat *RSTART;      /* start of re matched with ~; origin 1 (!) */
 Awkfloat *RLENGTH;     /* length of same */
@@ -101,12 +99,12 @@ void arginit(int ac, char **av)    /* set up ARGV and ARGC */
        int i;
        char temp[50];
 
-       ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
+       AARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
        cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
        ARGVtab = makesymtab(NSYMTAB);  /* could be (int) ARGC as well */
        cp->sval = (char *) ARGVtab;
        for (i = 0; i < ac; i++) {
-               sprintf(temp, "%d", i);
+               sprint(temp, "%d", i);
                if (is_number(*av))
                        setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
                else
@@ -124,7 +122,7 @@ void envinit(char **envp)   /* set up ENVIRON variable */
        ENVtab = makesymtab(NSYMTAB);
        cp->sval = (char *) ENVtab;
        for ( ; *envp; envp++) {
-               if ((p = strchr(*envp, '=')) == NULL)
+               if ((p = strchr(*envp, '=')) == nil)
                        continue;
                *p++ = 0;       /* split into two strings at = */
                if (is_number(p))
@@ -142,9 +140,9 @@ Array *makesymtab(int n)    /* make a new symbol table */
 
        ap = (Array *) malloc(sizeof(Array));
        tp = (Cell **) calloc(n, sizeof(Cell *));
-       if (ap == NULL || tp == NULL)
+       if (ap == nil || tp == nil)
                FATAL("out of space in makesymtab");
-       ap->nelem = 0;
+       ap->nelemt = 0;
        ap->size = n;
        ap->tab = tp;
        return(ap);
@@ -159,10 +157,10 @@ void freesymtab(Cell *ap) /* free a symbol table */
        if (!isarr(ap))
                return;
        tp = (Array *) ap->sval;
-       if (tp == NULL)
+       if (tp == nil)
                return;
        for (i = 0; i < tp->size; i++) {
-               for (cp = tp->tab[i]; cp != NULL; cp = temp) {
+               for (cp = tp->tab[i]; cp != nil; cp = temp) {
                        xfree(cp->nval);
                        if (freeable(cp))
                                xfree(cp->sval);
@@ -178,14 +176,14 @@ void freesymtab(Cell *ap) /* free a symbol table */
 void freeelem(Cell *ap, char *s)       /* free elem s from ap (i.e., ap["s"] */
 {
        Array *tp;
-       Cell *p, *prev = NULL;
+       Cell *p, *prev = nil;
        int h;
        
        tp = (Array *) ap->sval;
        h = hash(s, tp->size);
-       for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
+       for (p = tp->tab[h]; p != nil; prev = p, p = p->cnext)
                if (strcmp(s, p->nval) == 0) {
-                       if (prev == NULL)       /* 1st one */
+                       if (prev == nil)        /* 1st one */
                                tp->tab[h] = p->cnext;
                        else                    /* middle somewhere */
                                prev->cnext = p->cnext;
@@ -193,7 +191,7 @@ void freeelem(Cell *ap, char *s)    /* free elem s from ap (i.e., ap["s"] */
                                xfree(p->sval);
                        free(p->nval);
                        free(p);
-                       tp->nelem--;
+                       tp->nelemt--;
                        return;
                }
 }
@@ -203,13 +201,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
        int h;
        Cell *p;
 
-       if (n != NULL && (p = lookup(n, tp)) != NULL) {
-                  dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
+       if (n != nil && (p = lookup(n, tp)) != nil) {
+                  dprint( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
                        p, p->nval, p->sval, p->fval, p->tval) );
                return(p);
        }
        p = (Cell *) malloc(sizeof(Cell));
-       if (p == NULL)
+       if (p == nil)
                FATAL("out of space for symbol table at %s", n);
        p->nval = tostring(n);
        p->sval = s ? tostring(s) : tostring("");
@@ -217,13 +215,13 @@ Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
        p->tval = t;
        p->csub = CUNK;
        p->ctype = OCELL;
-       tp->nelem++;
-       if (tp->nelem > FULLTAB * tp->size)
+       tp->nelemt++;
+       if (tp->nelemt > FULLTAB * tp->size)
                rehash(tp);
        h = hash(n, tp->size);
        p->cnext = tp->tab[h];
        tp->tab[h] = p;
-          dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
+          dprint( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
                p, p->nval, p->sval, p->fval, p->tval) );
        return(p);
 }
@@ -244,7 +242,7 @@ void rehash(Array *tp)      /* rehash items in small table into big one */
 
        nsz = GROWTAB * tp->size;
        np = (Cell **) calloc(nsz, sizeof(Cell *));
-       if (np == NULL)         /* can't do it, but can keep running. */
+       if (np == nil)          /* can't do it, but can keep running. */
                return;         /* someone else will run out later. */
        for (i = 0; i < tp->size; i++) {
                for (cp = tp->tab[i]; cp; cp = op) {
@@ -265,10 +263,10 @@ Cell *lookup(char *s, Array *tp)  /* look for s in tp */
        int h;
 
        h = hash(s, tp->size);
-       for (p = tp->tab[h]; p != NULL; p = p->cnext)
+       for (p = tp->tab[h]; p != nil; p = p->cnext)
                if (strcmp(s, p->nval) == 0)
                        return(p);      /* found it */
-       return(NULL);                   /* not found */
+       return(nil);                    /* not found */
 }
 
 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
@@ -282,7 +280,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f)      /* set float val of a Cell */
                fldno = atoi(vp->nval);
                if (fldno > *NF)
                        newfld(fldno);
-                  dprintf( ("setting field %d to %g\n", fldno, f) );
+                  dprint( ("setting field %d to %g\n", fldno, f) );
        } else if (isrec(vp)) {
                donefld = 0;    /* mark $1... invalid */
                donerec = 1;
@@ -291,7 +289,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f)      /* set float val of a Cell */
                xfree(vp->sval); /* free any previous string */
        vp->tval &= ~STR;       /* mark string invalid */
        vp->tval |= NUM;        /* mark number ok */
-          dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
+          dprint( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
        return vp->fval = f;
 }
 
@@ -310,7 +308,7 @@ char *setsval(Cell *vp, char *s)    /* set string val of a Cell */
        char *t;
        int fldno;
 
-          dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
+          dprint( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
        if ((vp->tval & (NUM | STR)) == 0)
                funnyvar(vp, "assign to");
        if (isfld(vp)) {
@@ -318,7 +316,7 @@ char *setsval(Cell *vp, char *s)    /* set string val of a Cell */
                fldno = atoi(vp->nval);
                if (fldno > *NF)
                        newfld(fldno);
-                  dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
+                  dprint( ("setting field %d to %s (%p)\n", fldno, s, s) );
        } else if (isrec(vp)) {
                donefld = 0;    /* mark $1... invalid */
                donerec = 1;
@@ -329,7 +327,7 @@ char *setsval(Cell *vp, char *s)    /* set string val of a Cell */
        if (freeable(vp))
                xfree(vp->sval);
        vp->tval &= ~DONTFREE;
-          dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
+          dprint( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
        return(vp->sval = t);
 }
 
@@ -346,7 +344,7 @@ Awkfloat getfval(Cell *vp)  /* get float val of a Cell */
                if (is_number(vp->sval) && !(vp->tval&CON))
                        vp->tval |= NUM;        /* make NUM only sparingly */
        }
-          dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
+          dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
        return(vp->fval);
 }
 
@@ -365,14 +363,14 @@ char *getsval(Cell *vp)   /* get string val of a Cell */
                if (freeable(vp))
                        xfree(vp->sval);
                if (modf(vp->fval, &dtemp) == 0)        /* it's integral */
-                       sprintf(s, "%.30g", vp->fval);
+                       sprint(s, "%.30g", vp->fval);
                else
-                       sprintf(s, *CONVFMT, vp->fval);
+                       sprint(s, *CONVFMT, vp->fval);
                vp->sval = tostring(s);
                vp->tval &= ~DONTFREE;
                vp->tval |= STR;
        }
-          dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
+          dprint( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
        return(vp->sval);
 }
 
@@ -381,7 +379,7 @@ char *tostring(char *s)     /* make a copy of string s */
        char *p;
 
        p = (char *) malloc(strlen(s)+1);
-       if (p == NULL)
+       if (p == nil)
                FATAL("out of space in tostring on %s", s);
        strcpy(p, s);
        return(p);
@@ -393,7 +391,7 @@ char *qstring(char *s, int delim)   /* collect string up to next delim */
        int c, n;
        char *buf, *bp;
 
-       if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
+       if ((buf = (char *) malloc(strlen(s)+3)) == nil)
                FATAL( "out of space in qstring(%s)", s);
        for (bp = buf; (c = *s) != delim; s++) {
                if (c == '\n')
@@ -429,6 +427,6 @@ char *qstring(char *s, int delim)   /* collect string up to next delim */
                        }
                }
        }
-       *bp++ = 0;
+       *bp = 0;
        return buf;
 }
index 91fb90dc36067b9aed4ffdcbfabc550ab9c5ff67..b3f843ac3e513a76da1fe1a74245e769c130eee8 100644 (file)
@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
 #include "hash.h"
 
 Hash hash;
index a040429065e7346fb2fbb8200e93b4dafec28669..e4ebad7369f23c33a70703328dc3976856aa66aa 100644 (file)
@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
 #include "hash.h"
 
 enum
index 533d5958a2e12d3c0e6b2cff8f774a4d2048b9b7..9c64c8cc25ea9077a3056d62d96f0874deb025fa 100644 (file)
@@ -2,8 +2,8 @@
 #include <libc.h>
 #include <bin.h>
 #include <bio.h>
-#include <regexp.h>
-#include "/sys/src/libregexp/regcomp.h"
+#include "regexp.h"
+#include "regcomp.h"
 #include "dfa.h"
 
 void rdump(Reprog*);
index 9c7babd35d71917ba6233ea06406a16248c2277f..3753a90958db119402b252edd2cea975583fe033 100644 (file)
@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
 #include "/sys/src/libregexp/regcomp.h"
 #include "dfa.h"
 
index cbba30dc8fa317ea4911cca7368cb385e6c18802..9f640df041d5f17df2c02d45be969fbd70850f1c 100644 (file)
@@ -7,7 +7,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
 #include <ctype.h>
 #include "dfa.h"
 
index 8afaf2157fe1ece7320b323f44d100f994879910..1ef57a4cb77c065e782eb8ab3ad51864ae05a4fe 100644 (file)
@@ -4,7 +4,7 @@
 #include <u.h>
 #include <libc.h>
 #include "regexp.h"
-#include "/sys/src/libregexp/regcomp.h"
+#include "regcomp.h"
 
 #define        TRUE    1
 #define        FALSE   0
diff --git a/sys/src/cmd/upas/bayes/regcomp.h b/sys/src/cmd/upas/bayes/regcomp.h
new file mode 100644 (file)
index 0000000..402fe7d
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ *  substitution list
+ */
+#define NSUBEXP 32
+typedef struct Resublist       Resublist;
+struct Resublist
+{
+       Resub   m[NSUBEXP];
+};
+
+/*
+ * Actions and Tokens (Reinst types)
+ *
+ *     02xx are operators, value == precedence
+ *     03xx are tokens, i.e. operands for operators
+ */
+#define RUNE           0177
+#define        OPERATOR        0200    /* Bitmask of all operators */
+#define        START           0200    /* Start, used for marker on stack */
+#define        RBRA            0201    /* Right bracket, ) */
+#define        LBRA            0202    /* Left bracket, ( */
+#define        OR              0203    /* Alternation, | */
+#define        CAT             0204    /* Concatentation, implicit operator */
+#define        STAR            0205    /* Closure, * */
+#define        PLUS            0206    /* a+ == aa* */
+#define        QUEST           0207    /* a? == a|nothing, i.e. 0 or 1 a's */
+#define        ANY             0300    /* Any character except newline, . */
+#define        ANYNL           0301    /* Any character including newline, . */
+#define        NOP             0302    /* No operation, internal use only */
+#define        BOL             0303    /* Beginning of line, ^ */
+#define        EOL             0304    /* End of line, $ */
+#define        CCLASS          0305    /* Character class, [] */
+#define        NCCLASS         0306    /* Negated character class, [] */
+#define        END             0377    /* Terminate: match found */
+
+/*
+ *  regexec execution lists
+ */
+#define LISTSIZE       10
+#define BIGLISTSIZE    (25*LISTSIZE)
+typedef struct Relist  Relist;
+struct Relist
+{
+       Reinst*         inst;           /* Reinstruction of the thread */
+       Resublist       se;             /* matched subexpressions in this thread */
+};
+typedef struct Reljunk Reljunk;
+struct Reljunk
+{
+       Relist* relist[2];
+       Relist* reliste[2];
+       int     starttype;
+       Rune    startchar;
+       char*   starts;
+       char*   eol;
+       Rune*   rstarts;
+       Rune*   reol;
+};
+
+extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
+extern void    _renewmatch(Resub*, int, Resublist*);
+extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
+extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
index 4f550095c82acf7252d589f075bc9f945b861d30..9bc7bdc8501237e45d359a108e43c02c33db8c14 100644 (file)
@@ -1,7 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
-#include <regexp.h>
+#include "regexp.h"
 #include "dfa.h"
 
 /***
diff --git a/sys/src/cmd/upas/bayes/regexp.h b/sys/src/cmd/upas/bayes/regexp.h
new file mode 100644 (file)
index 0000000..780dc80
--- /dev/null
@@ -0,0 +1,66 @@
+#pragma        src     "/sys/src/oldlibregexp"
+#pragma        lib     "oldlibregexp.a"
+
+typedef struct Resub           Resub;
+typedef struct Reclass         Reclass;
+typedef struct Reinst          Reinst;
+typedef struct Reprog          Reprog;
+
+/*
+ *     Sub expression matches
+ */
+struct Resub{
+       union
+       {
+               char *sp;
+               Rune *rsp;
+       };
+       union
+       {
+               char *ep;
+               Rune *rep;
+       };
+};
+
+/*
+ *     character class, each pair of rune's defines a range
+ */
+struct Reclass{
+       Rune    *end;
+       Rune    spans[64];
+};
+
+/*
+ *     Machine instructions
+ */
+struct Reinst{
+       int     type;
+       union   {
+               Reclass *cp;            /* class pointer */
+               Rune    r;              /* character */
+               int     subid;          /* sub-expression id for RBRA and LBRA */
+               Reinst  *right;         /* right child of OR */
+       };
+       union { /* regexp relies on these two being in the same union */
+               Reinst *left;           /* left child of OR */
+               Reinst *next;           /* next instruction for CAT & LBRA */
+       };
+};
+
+/*
+ *     Reprogram definition
+ */
+struct Reprog{
+       Reinst  *startinst;     /* start pc */
+       Reclass class[16];      /* .data */
+       Reinst  firstinst[5];   /* .text */
+};
+
+extern Reprog  *regcomp(char*);
+extern Reprog  *regcomplit(char*);
+extern Reprog  *regcompnl(char*);
+extern void    regerror(char*);
+extern int     regexec(Reprog*, char*, Resub*, int);
+extern void    regsub(char*, char*, int, Resub*, int);
+extern int     rregexec(Reprog*, Rune*, Resub*, int);
+extern void    rregsub(Rune*, Rune*, int, Resub*, int);
index dcc457028d1b00db97e2362513c8061bc4f794bf..1b5cc7ddfe02154ea153c65819ad421b0eb65fa5 100644 (file)
@@ -6,12 +6,12 @@ OFILES=\
        regerror.$O\
        regexec.$O\
        regsub.$O\
-       regaux.$O\
        rregexec.$O\
        rregsub.$O\
+       regprint.$O\
 
 HFILES=/sys/include/regexp.h\
-       regcomp.h\
+       regimpl.h\
 
 UPDATE=\
        mkfile\
@@ -20,9 +20,3 @@ UPDATE=\
        ${LIB:/$objtype/%=/386/%}\
 
 </sys/src/cmd/mksyslib
-
-test: test.$O $OFILES
-       $LD -o test $prereq
-
-test2: test2.$O $OFILES
-       $LD -o test2 $prereq
diff --git a/sys/src/libregexp/regaux.c b/sys/src/libregexp/regaux.c
deleted file mode 100644 (file)
index a7d52ec..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <u.h>
-#include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-
-/*
- *  save a new match in mp
- */
-extern void
-_renewmatch(Resub *mp, int ms, Resublist *sp)
-{
-       int i;
-
-       if(mp==0 || ms<=0)
-               return;
-       if(mp[0].sp==0 || sp->m[0].sp<mp[0].sp ||
-          (sp->m[0].sp==mp[0].sp && sp->m[0].ep>mp[0].ep)){
-               for(i=0; i<ms && i<NSUBEXP; i++)
-                       mp[i] = sp->m[i];
-               for(; i<ms; i++)
-                       mp[i].sp = mp[i].ep = 0;
-       }
-}
-
-/*
- * Note optimization in _renewthread:
- *     *lp must be pending when _renewthread called; if *l has been looked
- *             at already, the optimization is a bug.
- */
-extern Relist*
-_renewthread(Relist *lp,       /* _relist to add to */
-       Reinst *ip,             /* instruction to add */
-       int ms,
-       Resublist *sep)         /* pointers to subexpressions */
-{
-       Relist *p;
-
-       for(p=lp; p->inst; p++){
-               if(p->inst == ip){
-                       if(sep->m[0].sp < p->se.m[0].sp){
-                               if(ms > 1)
-                                       p->se = *sep;
-                               else
-                                       p->se.m[0] = sep->m[0];
-                       }
-                       return 0;
-               }
-       }
-       p->inst = ip;
-       if(ms > 1)
-               p->se = *sep;
-       else
-               p->se.m[0] = sep->m[0];
-       (++p)->inst = 0;
-       return p;
-}
-
-/*
- * same as renewthread, but called with
- * initial empty start pointer.
- */
-extern Relist*
-_renewemptythread(Relist *lp,  /* _relist to add to */
-       Reinst *ip,             /* instruction to add */
-       int ms,
-       char *sp)               /* pointers to subexpressions */
-{
-       Relist *p;
-
-       for(p=lp; p->inst; p++){
-               if(p->inst == ip){
-                       if(sp < p->se.m[0].sp) {
-                               if(ms > 1)
-                                       memset(&p->se, 0, sizeof(p->se));
-                               p->se.m[0].sp = sp;
-                       }
-                       return 0;
-               }
-       }
-       p->inst = ip;
-       if(ms > 1)
-               memset(&p->se, 0, sizeof(p->se));
-       p->se.m[0].sp = sp;
-       (++p)->inst = 0;
-       return p;
-}
-
-extern Relist*
-_rrenewemptythread(Relist *lp, /* _relist to add to */
-       Reinst *ip,             /* instruction to add */
-       int ms,
-       Rune *rsp)              /* pointers to subexpressions */
-{
-       Relist *p;
-
-       for(p=lp; p->inst; p++){
-               if(p->inst == ip){
-                       if(rsp < p->se.m[0].rsp) {
-                               if(ms > 1)
-                                       memset(&p->se, 0, sizeof(p->se));
-                               p->se.m[0].rsp = rsp;
-                       }
-                       return 0;
-               }
-       }
-       p->inst = ip;
-       if(ms > 1)
-               memset(&p->se, 0, sizeof(p->se));
-       p->se.m[0].rsp = rsp;
-       (++p)->inst = 0;
-       return p;
-}
index e48ef15acb58b69f089100ce2bad237316e1d544..c937ee5c9a007d399127ad312f5eec22a9bd8840 100644 (file)
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
-
-#define        TRUE    1
-#define        FALSE   0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
-       Reinst* first;
-       Reinst* last;
-}Node;
-
-/* max character classes per program is nelem(reprog->class) */
-static Reprog  *reprog;
-
-/* max rune ranges per character class is nelem(classp->spans)/2 */
-#define NCCRUNE        nelem(classp->spans)
-
-#define        NSTACK  20
-static Node    andstack[NSTACK];
-static Node    *andp;
-static int     atorstack[NSTACK];
-static int*    atorp;
-static int     cursubid;               /* id of current subexpression */
-static int     subidstack[NSTACK];     /* parallel to atorstack */
-static int*    subidp;
-static int     lastwasand;     /* Last token was operand */
-static int     nbra;
-static char*   exprp;          /* pointer to next character in source expression */
-static int     lexdone;
-static int     nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int     errors;
-static Rune    yyrune;         /* last lex'd rune */
-static Reclass*yyclassp;       /* last lex'd class */
-
-/* predeclared crap */
-static void    operator(int);
-static void    pushand(Reinst*, Reinst*);
-static void    pushator(int);
-static void    evaluntil(int);
-static int     bldcclass(void);
-
-static jmp_buf regkaboom;
-
-static void
-rcerror(char *s)
+#include <regexp.h>
+#include "regimpl.h"
+
+int instrcnt[] = {
+       [TANY]    2,
+       [TBOL]    1,
+       [TCAT]    0,
+       [TCLASS]  1,
+       [TEOL]    1,
+       [TNOTNL]  1,
+       [TOR]     2,
+       [TPLUS]   1,
+       [TQUES]   1,
+       [TRUNE]   1,
+       [TSTAR]   2,
+       [TSUB]    2
+};
+
+static Renode*
+node(Parselex *plex, int op, Renode *l, Renode *r)
 {
-       errors++;
-       regerror(s);
-       longjmp(regkaboom, 1);
+       Renode *n;
+
+       plex->instrs += instrcnt[op];
+       n = plex->next++;
+       n->op = op;
+       n->left = l;
+       n->right = r;
+       return n;
 }
 
-static Reinst*
-newinst(int t)
+static Renode*
+e3(Parselex *plex)
 {
-       freep->type = t;
-       freep->left = 0;
-       freep->right = 0;
-       return freep++;
+       char error[128];
+       Renode *n;
+
+       switch(lex(plex)) {
+       case LANY:
+               return node(plex, TANY, nil, nil);
+       case LBOL:
+               return node(plex, TBOL, nil, nil);
+       case LEOL:
+               return node(plex, TEOL, nil, nil);
+       case LRUNE:
+               n = node(plex, TRUNE, nil, nil);
+               n->r = plex->rune;
+               return n;
+       case LCLASS:
+               if(plex->nc)
+                       return buildclassn(plex);
+               return buildclass(plex);
+       case LLPAR:
+               n = e0(plex);
+               n = node(plex, TSUB, n, nil);
+               if(lex(plex) != LRPAR) {
+                       snprint(error, sizeof(error), "regexp %s: no matching parenthesis", plex->orig);
+                       regerror(error);
+                       longjmp(plex->exitenv, 1);
+               }
+               return n;
+       default:
+               if(plex->rune)
+                       snprint(error, sizeof(error), "regexp %s: syntax error: %C", plex->orig, plex->rune);
+               else
+                       snprint(error, sizeof(error), "regexp %s: parsing error", plex->orig);
+               regerror(error);
+               longjmp(plex->exitenv, 1);
+       }
+       return nil;
 }
 
-static void
-operand(int t)
+static Renode*
+e2(Parselex *plex)
 {
-       Reinst *i;
-
-       if(lastwasand)
-               operator(CAT);  /* catenate is implicit */
-       i = newinst(t);
-
-       if(t == CCLASS || t == NCCLASS)
-               i->cp = yyclassp;
-       if(t == RUNE)
-               i->r = yyrune;
-
-       pushand(i, i);
-       lastwasand = TRUE;
+       Renode *n;
+
+       n = e3(plex);
+       if(lex(plex) == LREP) {
+               switch(plex->rune) {
+               case L'*':
+                       return node(plex, TSTAR, n, nil);
+               case L'+':
+                       return node(plex, TPLUS, n, nil);
+               case L'?':
+                       return node(plex, TQUES, n, nil);
+               }
+       }
+       plex->peek = 1;
+       return n;
 }
 
-static void
-operator(int t)
+static Renode*
+invert(Renode *n)
 {
-       if(t==RBRA && --nbra<0)
-               rcerror("unmatched right paren");
-       if(t==LBRA){
-               if(++cursubid >= NSUBEXP)
-                       rcerror ("too many subexpressions");
-               nbra++;
-               if(lastwasand)
-                       operator(CAT);
-       } else
-               evaluntil(t);
-       if(t != RBRA)
-               pushator(t);
-       lastwasand = FALSE;
-       if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
-               lastwasand = TRUE;      /* these look like operands */
+       Renode *n1;
+
+       if(n->op != TCAT)
+               return n;
+       while(n->left->op == TCAT) {
+               n1 = n->left;
+               n->left = n1->right;
+               n1->right = n;
+               n = n1;
+       }
+       return n;
 }
 
-static void
-regerr2(char *s, int c)
+static Renode*
+e1(Parselex *plex)
 {
-       char buf[100];
-       char *cp = buf;
-       while(*s)
-               *cp++ = *s++;
-       *cp++ = c;
-       *cp = '\0'; 
-       rcerror(buf);
+       Renode *n;
+       int sym;
+
+       n = e2(plex);
+       for(;;) {
+               sym = lex(plex);
+               if(sym == LEND || sym == LOR || sym == LRPAR)
+                       break;
+               plex->peek = 1;
+               n = node(plex, TCAT, n, e2(plex));
+       }
+       plex->peek = 1;
+       return invert(n);
 }
 
-static void
-cant(char *s)
+static Renode*
+e0(Parselex *plex)
 {
-       char buf[100];
-       strcpy(buf, "can't happen: ");
-       strcat(buf, s);
-       rcerror(buf);
+       Renode *n;
+
+       n = e1(plex);
+       for(;;) {
+               if(lex(plex) != LOR)
+                       break;
+               n = node(plex, TOR, n, e1(plex));
+       }
+       plex->peek = 1;
+       return n;
 }
 
-static void
-pushand(Reinst *f, Reinst *l)
+static Parselex*
+initplex(Parselex *plex, char *regstr, int lit)
 {
-       if(andp >= &andstack[NSTACK])
-               cant("operand stack overflow");
-       andp->first = f;
-       andp->last = l;
-       andp++;
+       plex->getnextr = lit ? getnextrlit : getnextr;
+       plex->rawexp = plex->orig = regstr;
+       plex->sub = 0;
+       plex->instrs = 0;
+       plex->peek = 0;
+       plex->done = 0;
+       return plex;
 }
 
-static void
-pushator(int t)
+static int
+maxthreads(Renode *tree)
 {
-       if(atorp >= &atorstack[NSTACK])
-               cant("operator stack overflow");
-       *atorp++ = t;
-       *subidp++ = cursubid;
+       tree = tree->left;
+       if(tree->op == TCAT)
+               tree = tree->left;
+       if(tree->op == TBOL)
+               return 2;
+       return -1;
 }
 
-static Node*
-popand(int op)
+static Reprog*
+regcomp1(char *regstr, int nl, int lit)
 {
-       Reinst *inst;
-
-       if(andp <= &andstack[0]){
-               regerr2("missing operand for ", op);
-               inst = newinst(NOP);
-               pushand(inst,inst);
+       Reprog *reprog;
+       Parselex plex;
+       Renode *parsetr;
+       int regstrlen, maxthr;
+
+       regstrlen = utflen(regstr);
+       initplex(&plex, regstr, lit);
+       plex.nodes = calloc(sizeof(*plex.nodes), regstrlen*2);
+       if(plex.nodes == nil)
+               return nil;
+       plex.next = plex.nodes;
+
+       if(setjmp(plex.exitenv) != 0) {
+               free(plex.nodes);
+               return nil;
        }
-       return --andp;
+
+       maxthr = regstrlen;
+       parsetr = node(&plex, TSUB, e0(&plex), nil);
+
+//     prtree(parsetr, 0, 1);
+       reprog = malloc(sizeof(Reprog) +
+                       sizeof(Reinst) * plex.instrs +
+                       sizeof(Rethread) * maxthr);
+       reprog->len = plex.instrs;
+       reprog->nthr = maxthr;
+       reprog->startinst = compile(parsetr, reprog, nl);
+       reprog->threads = (Rethread*)(reprog->startinst + reprog->len);
+       reprog->regstr = regstr;
+
+       free(plex.nodes);
+       return reprog;
 }
 
-static int
-popator(void)
+Reprog*
+regcomp(char *str)
 {
-       if(atorp <= &atorstack[0])
-               cant("operator stack underflow");
-       --subidp;
-       return *--atorp;
+       return regcomp1(str, 0, 0);
 }
 
-static void
-evaluntil(int pri)
+Reprog*
+regcomplit(char *str)
 {
-       Node *op1, *op2;
-       Reinst *inst1, *inst2;
-
-       while(pri==RBRA || atorp[-1]>=pri){
-               switch(popator()){
-               default:
-                       rcerror("unknown operator in evaluntil");
-                       break;
-               case LBRA:              /* must have been RBRA */
-                       op1 = popand('(');
-                       inst2 = newinst(RBRA);
-                       inst2->subid = *subidp;
-                       op1->last->next = inst2;
-                       inst1 = newinst(LBRA);
-                       inst1->subid = *subidp;
-                       inst1->next = op1->first;
-                       pushand(inst1, inst2);
-                       return;
-               case OR:
-                       op2 = popand('|');
-                       op1 = popand('|');
-                       inst2 = newinst(NOP);
-                       op2->last->next = inst2;
-                       op1->last->next = inst2;
-                       inst1 = newinst(OR);
-                       inst1->right = op1->first;
-                       inst1->left = op2->first;
-                       pushand(inst1, inst2);
-                       break;
-               case CAT:
-                       op2 = popand(0);
-                       op1 = popand(0);
-                       op1->last->next = op2->first;
-                       pushand(op1->first, op2->last);
-                       break;
-               case STAR:
-                       op2 = popand('*');
-                       inst1 = newinst(OR);
-                       op2->last->next = inst1;
-                       inst1->right = op2->first;
-                       pushand(inst1, inst1);
-                       break;
-               case PLUS:
-                       op2 = popand('+');
-                       inst1 = newinst(OR);
-                       op2->last->next = inst1;
-                       inst1->right = op2->first;
-                       pushand(op2->first, inst1);
-                       break;
-               case QUEST:
-                       op2 = popand('?');
-                       inst1 = newinst(OR);
-                       inst2 = newinst(NOP);
-                       inst1->left = inst2;
-                       inst1->right = op2->first;
-                       op2->last->next = inst2;
-                       pushand(inst1, inst2);
-                       break;
-               }
-       }
+       return regcomp1(str, 0, 1);
 }
 
-static Reprog*
-optimize(Reprog *pp)
+Reprog*
+regcompnl(char *str)
 {
-       Reinst *inst, *target;
-       int size;
-       Reprog *npp;
-       Reclass *cl;
-       int diff;
-
-       /*
-        *  get rid of NOOP chains
-        */
-       for(inst=pp->firstinst; inst->type!=END; inst++){
-               target = inst->next;
-               while(target->type == NOP)
-                       target = target->next;
-               inst->next = target;
-       }
-
-       /*
-        *  The original allocation is for an area larger than
-        *  necessary.  Reallocate to the actual space used
-        *  and then relocate the code.
-        */
-       size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
-       npp = realloc(pp, size);
-       if(npp==0 || npp==pp)
-               return pp;
-       diff = (char *)npp - (char *)pp;
-       freep = (Reinst *)((char *)freep + diff);
-       for(inst=npp->firstinst; inst<freep; inst++){
-               switch(inst->type){
-               case OR:
-               case STAR:
-               case PLUS:
-               case QUEST:
-                       *(char **)&inst->right += diff;
-                       break;
-               case CCLASS:
-               case NCCLASS:
-                       *(char **)&inst->right += diff;
-                       cl = inst->cp;
-                       *(char **)&cl->end += diff;
-                       break;
-               }
-               *(char **)&inst->left += diff;
-       }
-       *(char **)&npp->startinst += diff;
-       return npp;
+       return regcomp1(str, 1, 0);
 }
 
-#ifdef DEBUG
-static void
-dumpstack(void){
-       Node *stk;
-       int *ip;
-
-       print("operators\n");
-       for(ip=atorstack; ip<atorp; ip++)
-               print("0%o\n", *ip);
-       print("operands\n");
-       for(stk=andstack; stk<andp; stk++)
-               print("0%o\t0%o\n", stk->first->type, stk->last->type);
+static Reinst*
+compile1(Renode *renode, Reinst *reinst, int *sub, int nl)
+{
+       Reinst *i;
+       int s;
+
+Tailcall:
+       if(renode == nil)
+               return reinst;
+       switch(renode->op) {
+       case TCLASS:
+               reinst->op = OCLASS;
+               reinst->r = renode->r;
+               reinst->r1 = renode->r1;
+               reinst->a = reinst + 1 + renode->nclass;
+               renode = renode->left;
+               reinst++;
+               goto Tailcall;
+       case TCAT:
+               reinst = compile1(renode->left, reinst, sub, nl);
+               renode = renode->right;
+               goto Tailcall;
+       case TOR:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               i = compile1(renode->left, reinst->a, sub, nl);
+               reinst->b = i + 1;
+               i->op = OJMP;
+               i->a = compile1(renode->right, reinst->b, sub, nl);
+               return i->a;
+       case TSTAR:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               i = compile1(renode->left, reinst->a, sub, nl);
+               reinst->b = i + 1;
+               i->op = OJMP;
+               i->a = reinst;
+               return reinst->b;
+       case TPLUS:
+               i = reinst;
+               reinst = compile1(renode->left, reinst, sub, nl);
+               reinst->op = OSPLIT;
+               reinst->a = i;
+               reinst->b = reinst + 1;
+               return reinst->b;
+       case TQUES:
+               reinst->op = OSPLIT;
+               reinst->a = reinst + 1;
+               reinst->b = compile1(renode->left, reinst->a, sub, nl);
+               return reinst->b;
+       case TSUB:
+               reinst->op = OSAVE;
+               reinst->sub = s = (*sub)++;
+               reinst = compile1(renode->left, reinst+1, sub, nl);
+               reinst->op = OUNSAVE;
+               reinst->sub = s;
+               return reinst + 1;
+       case TANY:
+               if(nl == 0)
+                       reinst++->op = ONOTNL;
+               reinst->op = OANY;
+               return reinst + 1;
+       case TRUNE:
+               reinst->op = ORUNE;
+               reinst->r = renode->r;
+               return reinst + 1;
+       case TNOTNL:
+               reinst->op = ONOTNL;
+               return reinst + 1;
+       case TEOL:
+               reinst->op = OEOL;
+               return reinst + 1;
+       case TBOL:
+               reinst->op = OBOL;
+               return reinst + 1;
+       }
+       return nil;
 }
 
-static void
-dump(Reprog *pp)
+static Reinst*
+compile(Renode *parsetr, Reprog *reprog, int nl)
 {
-       Reinst *l;
-       Rune *p;
+       Reinst *reinst;
+       int sub;
 
-       l = pp->firstinst;
-       do{
-               print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
-                       l->left-pp->firstinst, l->right-pp->firstinst);
-               if(l->type == RUNE)
-                       print("\t%C\n", l->r);
-               else if(l->type == CCLASS || l->type == NCCLASS){
-                       print("\t[");
-                       if(l->type == NCCLASS)
-                               print("^");
-                       for(p = l->cp->spans; p < l->cp->end; p += 2)
-                               if(p[0] == p[1])
-                                       print("%C", p[0]);
-                               else
-                                       print("%C-%C", p[0], p[1]);
-                       print("]\n");
-               } else
-                       print("\n");
-       }while(l++->type);
+       sub = 0;
+       reinst = (Reinst*)(reprog+1);
+       compile1(parsetr, reinst, &sub, nl);
+       return reinst;
 }
-#endif
 
-static Reclass*
-newclass(void)
+static void
+getnextr(Parselex *l)
 {
-       if(nclass >= nelem(reprog->class))
-               rcerror("too many character classes; increase Reprog.class size");
-       return &(classp[nclass++]);
+       l->literal = 0;
+       if(l->done) {
+               l->rune = 0;
+               return;
+       }
+       l->rawexp += chartorune(&l->rune, l->rawexp);
+       if(l->rune == L'\\') {
+               l->rawexp += chartorune(&l->rune, l->rawexp);
+               l->literal = 1;
+       }
+       if(*l->rawexp == 0)
+               l->done = 1;
+       return;
 }
 
-static int
-nextc(Rune *rp)
+static void
+getnextrlit(Parselex *l)
 {
-       if(lexdone){
-               *rp = 0;
-               return 1;
-       }
-       exprp += chartorune(rp, exprp);
-       if(*rp == L'\\'){
-               exprp += chartorune(rp, exprp);
-               return 1;
+       l->literal = 1;
+       if(l->done) {
+               l->literal = 0;
+               l->rune = 0;
+               return;
        }
-       if(*rp == 0)
-               lexdone = 1;
-       return 0;
+       l->rawexp += chartorune(&l->rune, l->rawexp);
+       if(*l->rawexp == 0)
+               l->done = 1;
+       return;
 }
 
-static int
-lex(int literal, int dot_type)
+static int
+lex(Parselex *l)
 {
-       int quoted;
-
-       quoted = nextc(&yyrune);
-       if(literal || quoted){
-               if(yyrune == 0)
-                       return END;
-               return RUNE;
+       if(l->peek) {
+               l->peek = 0;
+               return l->peeklex;
        }
-
-       switch(yyrune){
+       l->getnextr(l);
+       if(l->literal)
+               return l->peeklex = LRUNE;
+       switch(l->rune){
        case 0:
-               return END;
+               return l->peeklex = LEND;
        case L'*':
-               return STAR;
        case L'?':
-               return QUEST;
        case L'+':
-               return PLUS;
+               return l->peeklex = LREP;
        case L'|':
-               return OR;
+               return l->peeklex = LOR;
        case L'.':
-               return dot_type;
+               return l->peeklex = LANY;
        case L'(':
-               return LBRA;
+               return l->peeklex = LLPAR;
        case L')':
-               return RBRA;
+               return l->peeklex = LRPAR;
        case L'^':
-               return BOL;
+               return l->peeklex = LBOL;
        case L'$':
-               return EOL;
+               return l->peeklex = LEOL;
        case L'[':
-               return bldcclass();
+               getclass(l);
+               return l->peeklex = LCLASS;
        }
-       return RUNE;
+       return l->peeklex = LRUNE;
 }
 
 static int
-bldcclass(void)
+pcmp(void *va, void *vb)
 {
-       int type;
-       Rune r[NCCRUNE];
-       Rune *p, *ep, *np;
-       Rune rune;
-       int quoted;
-
-       /* we have already seen the '[' */
-       type = CCLASS;
-       yyclassp = newclass();
-
-       /* look ahead for negation */
-       /* SPECIAL CASE!!! negated classes don't match \n */
-       ep = r;
-       quoted = nextc(&rune);
-       if(!quoted && rune == L'^'){
-               type = NCCLASS;
-               quoted = nextc(&rune);
-               *ep++ = L'\n';
-               *ep++ = L'\n';
-       }
-
-       /* parse class into a set of spans */
-       while(ep < &r[NCCRUNE-1]){
-               if(rune == 0){
-                       rcerror("malformed '[]'");
-                       return 0;
-               }
-               if(!quoted && rune == L']')
-                       break;
-               if(!quoted && rune == L'-'){
-                       if(ep == r){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       quoted = nextc(&rune);
-                       if((!quoted && rune == L']') || rune == 0){
-                               rcerror("malformed '[]'");
-                               return 0;
-                       }
-                       *(ep-1) = rune;
-               } else {
-                       *ep++ = rune;
-                       *ep++ = rune;
-               }
-               quoted = nextc(&rune);
-       }
-       if(ep >= &r[NCCRUNE-1]) {
-               rcerror("char class too large; increase Reclass.spans size");
-               return 0;
-       }
-
-       /* sort on span start */
-       for(p = r; p < ep; p += 2){
-               for(np = p; np < ep; np += 2)
-                       if(*np < *p){
-                               rune = np[0];
-                               np[0] = p[0];
-                               p[0] = rune;
-                               rune = np[1];
-                               np[1] = p[1];
-                               p[1] = rune;
-                       }
-       }
+       vlong n;
+       Rune *a, *b;
 
-       /* merge spans */
-       np = yyclassp->spans;
-       p = r;
-       if(r == ep)
-               yyclassp->end = np;
-       else {
-               np[0] = *p++;
-               np[1] = *p++;
-               for(; p < ep; p += 2)
-                       /* overlapping or adjacent ranges? */
-                       if(p[0] <= np[1] + 1){
-                               if(p[1] >= np[1])
-                                       np[1] = p[1];   /* coalesce */
-                       } else {
-                               np += 2;
-                               np[0] = p[0];
-                               np[1] = p[1];
-                       }
-               yyclassp->end = np+2;
-       }
+       a = va;
+       b = vb;
 
-       return type;
+       n = (vlong)b[0] - (vlong)a[0];
+       if(n)
+               return n;
+       return (vlong)b[1] - (vlong)a[1];
 }
 
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
+static void
+getclass(Parselex *l)
 {
-       int token;
-       Reprog *pp;
-
-       /* get memory for the program */
-       pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
-       if(pp == 0){
-               regerror("out of memory");
-               return 0;
+       Rune *p, *q, t;
+
+       l->nc = 0;
+       getnextrlit(l);
+       if(l->rune == L'^') {
+               l->nc = 1;
+               getnextrlit(l);
        }
-       freep = pp->firstinst;
-       classp = pp->class;
-       errors = 0;
-
-       if(setjmp(regkaboom))
-               goto out;
-
-       /* go compile the sucker */
-       lexdone = 0;
-       exprp = s;
-       nclass = 0;
-       nbra = 0;
-       atorp = atorstack;
-       andp = andstack;
-       subidp = subidstack;
-       lastwasand = FALSE;
-       cursubid = 0;
-
-       /* Start with a low priority operator to prime parser */
-       pushator(START-1);
-       while((token = lex(literal, dot_type)) != END){
-               if((token&0300) == OPERATOR)
-                       operator(token);
-               else
-                       operand(token);
+       p = l->cpairs;
+       for(;;) {
+               *p = l->rune;
+               if(l->rune == L']')
+                       break;
+               if(l->rune == L'-') {
+                       regerror("Malformed class");
+                       longjmp(l->exitenv, 1);
+               }
+               if(l->rune == '\\') {
+                       getnextrlit(l);
+                       *p = l->rune;
+               }
+               if(l->rune == 0) {
+                       regerror("No closing ] for class");
+                       longjmp(l->exitenv, 1);
+               }
+               getnextrlit(l);
+               if(l->rune == L'-') {
+                       getnextrlit(l);
+                       if(l->rune == L']') {
+                               regerror("Malformed class");
+                               longjmp(l->exitenv, 1);
+                       }
+                       if(l->rune == L'-') {
+                               regerror("Malformed class");
+                               longjmp(l->exitenv, 1);
+                       }
+                       if(l->rune == L'\\')
+                               getnextrlit(l);
+                       p[1] = l->rune;
+                       if(p[0] > p[1]) {
+                               t = p[0];
+                               p[0] = p[1];
+                               p[1] = t;
+                       }
+                       getnextrlit(l);
+               } else
+                       p[1] = p[0];
+               if(p >= l->cpairs + nelem(l->cpairs) - 2) {
+                       regerror("Class too big\n");
+                       longjmp(l->exitenv, 1);
+               }
+               p += 2;
        }
-
-       /* Close with a low priority operator */
-       evaluntil(START);
-
-       /* Force END */
-       operand(END);
-       evaluntil(START);
-#ifdef DEBUG
-       dumpstack();
-#endif
-       if(nbra)
-               rcerror("unmatched left paren");
-       --andp; /* points to first and only operand */
-       pp->startinst = andp->first;
-#ifdef DEBUG
-       dump(pp);
-#endif
-       pp = optimize(pp);
-#ifdef DEBUG
-       print("start: %d\n", andp->first-pp->firstinst);
-       dump(pp);
-#endif
-out:
-       if(errors){
-               free(pp);
-               pp = 0;
+       *p = L'\0';
+       qsort(l->cpairs, (p - l->cpairs)/2, 2*sizeof(*l->cpairs), pcmp);
+       q = l->cpairs;
+       for(p = l->cpairs+2; *p != 0; p += 2) {
+               if(p[1] < q[0] - 1) {
+                       q[2] = p[0];
+                       q[3] = p[1];
+                       q += 2;
+                       continue;
+               }
+               q[0] = p[0];
+               if(p[1] > q[1])
+                       q[1] = p[1];
        }
-       return pp;
+       q[2] = 0;
 }
 
-extern Reprog*
-regcomp(char *s)
+/* classes are in descending order */
+static Renode*
+buildclassn(Parselex *l)
 {
-       return regcomp1(s, 0, ANY);
+       Renode *n;
+       Rune *p;
+       int i;
+
+       i = 0;
+       p = l->cpairs;
+       n = node(l, TCLASS, nil, nil);
+       n->r = p[1] + 1;
+       n->r1 = Runemax;
+       n->nclass = i++;
+
+       for(; *p != 0; p += 2) {
+               n = node(l, TCLASS, n, nil);
+               n->r = p[3] + 1;
+               n->r1 = p[0] - 1;
+               n->nclass = i++;
+       }
+       n->r = 0;
+       return node(l, TCAT, node(l, TNOTNL, nil, nil), n);
 }
 
-extern Reprog*
-regcomplit(char *s)
+static Renode*
+buildclass(Parselex *l)
 {
-       return regcomp1(s, 1, ANY);
+       Renode *n;
+       Rune *p;
+       int i;
+
+       i = 0;
+       n = node(l, TCLASS, nil, nil);
+       n->r = Runemax + 1;
+       n->nclass = i++;
+
+       for(p = l->cpairs; *p != 0; p += 2) {
+               n = node(l, TCLASS, n, nil);
+               n->r = p[0];
+               n->r1 = p[1];
+               n->nclass = i++;
+       }
+       return n;
 }
 
-extern Reprog*
-regcompnl(char *s)
+static void
+prtree(Renode *tree, int d, int f)
 {
-       return regcomp1(s, 0, ANYNL);
+       int i;
+
+       if(tree == nil)
+               return;
+       if(f)
+       for(i = 0; i < d; i++)
+               print("\t");
+       switch(tree->op) {
+       case TCAT:
+               prtree(tree->left, d, 0);
+               prtree(tree->right, d, 1);
+               break;
+       case TOR:
+               print("TOR\n");
+               prtree(tree->left, d+1, 1);
+               for(i = 0; i < d; i++)
+                       print("\t");
+               print("|\n");
+               prtree(tree->right, d+1, 1);
+               break;
+       case TSTAR:
+               print("*\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TPLUS:
+               print("+\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TQUES:
+               print("?\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TANY:
+               print(".\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TBOL:
+               print("^\n");
+               break;
+       case TEOL:
+               print("$\n");
+               break;
+       case TSUB:
+               print("TSUB\n");
+               prtree(tree->left, d+1, 1);
+               break;
+       case TRUNE:
+               print("TRUNE: %C\n", tree->r);
+               break;
+       case TNOTNL:
+               print("TNOTNL: !\\n\n");
+               break;
+       case TCLASS:
+               print("CLASS: %C-%C\n", tree->r, tree->r1);
+               prtree(tree->left, d, 1);
+               break;
+       }
 }
diff --git a/sys/src/libregexp/regcomp.h b/sys/src/libregexp/regcomp.h
deleted file mode 100644 (file)
index 402fe7d..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  substitution list
- */
-#define NSUBEXP 32
-typedef struct Resublist       Resublist;
-struct Resublist
-{
-       Resub   m[NSUBEXP];
-};
-
-/*
- * Actions and Tokens (Reinst types)
- *
- *     02xx are operators, value == precedence
- *     03xx are tokens, i.e. operands for operators
- */
-#define RUNE           0177
-#define        OPERATOR        0200    /* Bitmask of all operators */
-#define        START           0200    /* Start, used for marker on stack */
-#define        RBRA            0201    /* Right bracket, ) */
-#define        LBRA            0202    /* Left bracket, ( */
-#define        OR              0203    /* Alternation, | */
-#define        CAT             0204    /* Concatentation, implicit operator */
-#define        STAR            0205    /* Closure, * */
-#define        PLUS            0206    /* a+ == aa* */
-#define        QUEST           0207    /* a? == a|nothing, i.e. 0 or 1 a's */
-#define        ANY             0300    /* Any character except newline, . */
-#define        ANYNL           0301    /* Any character including newline, . */
-#define        NOP             0302    /* No operation, internal use only */
-#define        BOL             0303    /* Beginning of line, ^ */
-#define        EOL             0304    /* End of line, $ */
-#define        CCLASS          0305    /* Character class, [] */
-#define        NCCLASS         0306    /* Negated character class, [] */
-#define        END             0377    /* Terminate: match found */
-
-/*
- *  regexec execution lists
- */
-#define LISTSIZE       10
-#define BIGLISTSIZE    (25*LISTSIZE)
-typedef struct Relist  Relist;
-struct Relist
-{
-       Reinst*         inst;           /* Reinstruction of the thread */
-       Resublist       se;             /* matched subexpressions in this thread */
-};
-typedef struct Reljunk Reljunk;
-struct Reljunk
-{
-       Relist* relist[2];
-       Relist* reliste[2];
-       int     starttype;
-       Rune    startchar;
-       char*   starts;
-       char*   eol;
-       Rune*   rstarts;
-       Rune*   reol;
-};
-
-extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
-extern void    _renewmatch(Resub*, int, Resublist*);
-extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
-extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
index 92b73a0d9188b78083aa32f6a7a733b619416c8d..1e0eef175eaf7f922500a123c24a19b0248b182e 100644 (file)
@@ -1,6 +1,6 @@
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
 
 void
 regerror(char *s)
index b806ba60a384f94101323f8a12a1a25868551f2d..89501de6a843c39ffe2037fd4986a87f79ff2501 100644 (file)
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
+#include <regexp.h>
+#include "regimpl.h"
 
+typedef struct RethreadQ RethreadQ;
+struct RethreadQ
+{
+       Rethread *head;
+       Rethread **tail;
+};
 
-/*
- *  return     0 if no match
- *             >0 if a match
- *             <0 if we ran out of _relist space
- */
-static int
-regexec1(Reprog *progp,        /* program to run */
-       char *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms,         /* number of elements at mp */
-       Reljunk *j
-)
+int
+regexec(Reprog *prog, char *str, Resub *sem, int msize)
 {
-       int flag=0;
-       Reinst *inst;
-       Relist *tlp;
-       char *s;
-       int i, checkstart;
-       Rune r, *rp, *ep;
-       int n;
-       Relist* tl;             /* This list, next list */
-       Relist* nl;
-       Relist* tle;            /* ends of this and next list */
-       Relist* nle;
-       int match;
-       char *p;
+       RethreadQ lists[2], *clist, *nlist, *tmp;
+       Rethread *t, *next, *pooltop, *avail;
+       Reinst *curinst;
+       Rune r;
+       char *sp, *ep, endc;
+       int i, match, first, gen, matchpri, pri;
 
-       match = 0;
-       checkstart = j->starttype;
-       if(mp)
-               for(i=0; i<ms; i++) {
-                       mp[i].sp = 0;
-                       mp[i].ep = 0;
-               }
-       j->relist[0][0].inst = 0;
-       j->relist[1][0].inst = 0;
+       if(msize > NSUBEXPM)
+               msize = NSUBEXPM;
 
-       /* Execute machine once for each character, including terminal NUL */
-       s = j->starts;
-       do{
-               /* fast check for first char */
-               if(checkstart) {
-                       switch(j->starttype) {
-                       case RUNE:
-                               p = utfrune(s, j->startchar);
-                               if(p == 0 || s == j->eol)
-                                       return match;
-                               s = p;
-                               break;
-                       case BOL:
-                               if(s == bol)
-                                       break;
-                               p = utfrune(s, '\n');
-                               if(p == 0 || s == j->eol)
-                                       return match;
-                               s = p+1;
-                               break;
-                       }
-               }
-               r = *(uchar*)s;
-               if(r < Runeself)
-                       n = 1;
-               else
-                       n = chartorune(&r, s);
+       if(prog->startinst->gen != 0) {
+               for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
+                       curinst->gen = 0;
+       }
 
-               /* switch run lists */
-               tl = j->relist[flag];
-               tle = j->reliste[flag];
-               nl = j->relist[flag^=1];
-               nle = j->reliste[flag];
-               nl->inst = 0;
+       clist = lists;
+       clist->head = nil;
+       clist->tail = &clist->head;
+       nlist = lists + 1;
+       nlist->head = nil;
+       nlist->tail = &nlist->head;
 
-               /* Add first instruction to current list */
-               if(match == 0)
-                       _renewemptythread(tl, progp->startinst, ms, s);
+       pooltop = prog->threads + prog->nthr;
+       avail = nil;
 
-               /* Execute machine until current list is empty */
-               for(tlp=tl; tlp->inst; tlp++){  /* assignment = */
-                       for(inst = tlp->inst; ; inst = inst->next){
-                               switch(inst->type){
-                               case RUNE:      /* regular character */
-                                       if(inst->r == r){
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       }
-                                       break;
-                               case LBRA:
-                                       tlp->se.m[inst->subid].sp = s;
-                                       continue;
-                               case RBRA:
-                                       tlp->se.m[inst->subid].ep = s;
-                                       continue;
-                               case ANY:
-                                       if(r != '\n')
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case ANYNL:
-                                       if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case BOL:
-                                       if(s == bol || *(s-1) == '\n')
-                                               continue;
-                                       break;
-                               case EOL:
-                                       if(s == j->eol || r == 0 || r == '\n')
-                                               continue;
-                                       break;
-                               case CCLASS:
-                                       ep = inst->cp->end;
-                                       for(rp = inst->cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1]){
-                                                       if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                               return -1;
-                                                       break;
-                                               }
-                                       break;
-                               case NCCLASS:
-                                       ep = inst->cp->end;
-                                       for(rp = inst->cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1])
-                                                       break;
-                                       if(rp == ep)
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case OR:
-                                       /* evaluate right choice later */
-                                       if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
-                                               return -1;
-                                       /* efficiency: advance and re-evaluate */
-                                       continue;
-                               case END:       /* Match! */
-                                       match = 1;
-                                       tlp->se.m[0].ep = s;
-                                       if(mp != 0)
-                                               _renewmatch(mp, ms, &tlp->se);
-                                       break;
+       pri = matchpri = gen = match = 0;
+       sp = str;
+       ep = nil;
+       endc = '\0';
+       if(sem != nil && msize > 0) {
+               if(sem->sp != nil)
+                       sp = sem->sp;
+               if(sem->ep != nil && *sem->ep != '\0') {
+                       ep = sem->ep;
+                       endc = *sem->ep;
+                       *sem->ep = '\0';
+               }
+       }
+       r = Runemax + 1; 
+       for(; r != L'\0'; sp += i) {
+               gen++;
+               i = chartorune(&r, sp);
+               first = 1;
+               t = clist->head;
+               if(t == nil)
+                       goto Start;
+               curinst = t->pc;
+Again:
+               if(curinst->gen == gen)
+                       goto Done;
+               curinst->gen = gen;
+               switch(curinst->op) {
+               case ORUNE:
+                       if(r != curinst->r)
+                               goto Done;
+               case OANY: /* fallthrough */
+               Any:
+                       next = t->next;
+                       t->pc = curinst + 1;
+                       t->next = nil;
+                       *nlist->tail = t;
+                       nlist->tail = &t->next;
+                       if(next == nil)
+                               break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               case OCLASS:
+               Class:
+                       if(r < curinst->r)
+                               goto Done;
+                       if(r > curinst->r1) {
+                               curinst++;
+                               goto Class;
+                       }
+                       next = t->next;
+                       t->pc = curinst->a;
+                       t->next = nil;
+                       *nlist->tail = t;
+                       nlist->tail = &t->next;
+                       if(next == nil)
+                               break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               case ONOTNL:
+                       if(r != L'\n') {
+                               curinst++;
+                               goto Again;
+                       }
+                       goto Done;
+               case OBOL:
+                       if(sp == str || sp[-1] == '\n') {
+                               curinst++;
+                               goto Again;
+                       }
+                       goto Done;
+               case OEOL:
+                       if(r == L'\0' && ep == nil) {
+                               curinst++;
+                               goto Again;
+                       }
+                       if(r == L'\n')
+                               goto Any;
+                       goto Done;
+               case OJMP:
+                       curinst = curinst->a;
+                       goto Again;
+               case OSPLIT:
+                       if(avail == nil)
+                               next = --pooltop;
+                       else {
+                               next = avail;
+                               avail = avail->next;
+                       }
+                       next->pc = curinst->b;
+                       if(msize > 0)
+                               memcpy(next->sem, t->sem, sizeof(Resub)*msize);
+                       next->pri = t->pri;
+                       next->next = t->next;
+                       t->next = next;
+                       curinst = curinst->a;
+                       goto Again;
+               case OSAVE:
+                       if(curinst->sub < msize)
+                               t->sem[curinst->sub].sp = sp;
+                       curinst++;
+                       goto Again;
+               case OUNSAVE:
+                       if(curinst->sub == 0) {
+                               /* "Highest" priority is the left-most longest. */
+                               if (t->pri > matchpri)
+                                       goto Done;
+                               match = 1;
+                               matchpri = t->pri;
+                               if(sem != nil && msize > 0) {
+                                       memcpy(sem, t->sem, sizeof(Resub)*msize);
+                                       sem->ep = sp;
                                }
+                               goto Done;
+                       }
+                       if(curinst->sub < msize)
+                               t->sem[curinst->sub].ep = sp;
+                       curinst++;
+                       goto Again;
+               Done:
+                       next = t->next;
+                       t->next = avail;
+                       avail = t;
+                       if(next == nil)
                                break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               }
+Start:
+               /* Start again once if we haven't found anything. */
+               if(first == 1 && match == 0) {
+                       first = 0;
+                       if(avail == nil)
+                               t = --pooltop;
+                       else {
+                               t = avail;
+                               avail = avail->next;
                        }
+                       if(msize > 0)
+                               memset(t->sem, 0, sizeof(Resub)*msize);
+                       /* "Lower" priority thread */
+                       t->pri = matchpri = pri++;
+                       t->next = nil;
+                       curinst = prog->startinst;
+                       goto Again;
                }
-               if(s == j->eol)
+               /* If we have a match and no extant threads, we are done. */
+               if(match == 1 && nlist->head == nil)
                        break;
-               checkstart = j->starttype && nl->inst==0;
-               s += n;
-       }while(r);
-       return match;
-}
-
-static int
-regexec2(Reprog *progp,        /* program to run */
-       char *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms,         /* number of elements at mp */
-       Reljunk *j
-)
-{
-       int rv;
-       Relist *relist0, *relist1;
-
-       /* mark space */
-       relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
-       if(relist0 == nil)
-               return -1;
-       relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
-       if(relist1 == nil){
-               free(relist1);
-               return -1;
-       }
-       j->relist[0] = relist0;
-       j->relist[1] = relist1;
-       j->reliste[0] = relist0 + BIGLISTSIZE - 2;
-       j->reliste[1] = relist1 + BIGLISTSIZE - 2;
-
-       rv = regexec1(progp, bol, mp, ms, j);
-       free(relist0);
-       free(relist1);
-       return rv;
-}
-
-extern int
-regexec(Reprog *progp, /* program to run */
-       char *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements at mp */
-{
-       Reljunk j;
-       Relist relist0[LISTSIZE], relist1[LISTSIZE];
-       int rv;
-
-       /*
-        *  use user-specified starting/ending location if specified
-        */
-       j.starts = bol;
-       j.eol = 0;
-       if(mp && ms>0){
-               if(mp->sp)
-                       j.starts = mp->sp;
-               if(mp->ep)
-                       j.eol = mp->ep;
+               tmp = clist;
+               clist = nlist;
+               nlist = tmp;
+               nlist->head = nil;
+               nlist->tail = &nlist->head;
        }
-       j.starttype = 0;
-       j.startchar = 0;
-       if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
-               j.starttype = RUNE;
-               j.startchar = progp->startinst->r;
-       }
-       if(progp->startinst->type == BOL)
-               j.starttype = BOL;
-
-       /* mark space */
-       j.relist[0] = relist0;
-       j.relist[1] = relist1;
-       j.reliste[0] = relist0 + nelem(relist0) - 2;
-       j.reliste[1] = relist1 + nelem(relist1) - 2;
-
-       rv = regexec1(progp, bol, mp, ms, &j);
-       if(rv >= 0)
-               return rv;
-       rv = regexec2(progp, bol, mp, ms, &j);
-       if(rv >= 0)
-               return rv;
-       return -1;
+       if(ep != nil)
+               *ep = endc;
+       return match;
 }
diff --git a/sys/src/libregexp/regimpl.h b/sys/src/libregexp/regimpl.h
new file mode 100644 (file)
index 0000000..f173a97
--- /dev/null
@@ -0,0 +1,104 @@
+enum
+{
+       LANY = 0,
+       LBOL,
+       LCLASS,
+       LEND,
+       LEOL,
+       LLPAR,
+       LOR,
+       LREP,
+       LRPAR,
+       LRUNE,
+
+       TANY = 0,
+       TBOL,
+       TCAT,
+       TCLASS,
+       TEOL,
+       TNOTNL,
+       TOR,
+       TPLUS,
+       TQUES,
+       TRUNE,
+       TSTAR,
+       TSUB,
+
+       NSUBEXPM = 32
+};
+
+typedef struct Parselex Parselex;
+typedef struct Renode Renode;
+
+struct Parselex
+{
+       /* Parse */
+       Renode *next;
+       Renode *nodes;
+       int sub;
+       int instrs;
+       jmp_buf exitenv;
+       /* Lex */
+       void (*getnextr)(Parselex*);
+       char *rawexp;
+       char *orig;
+       Rune rune;
+       Rune peek;
+       int peeklex;
+       int done;
+       int literal;
+       Rune cpairs[400+2];
+       int nc;
+};
+struct Renode
+{
+       int op;
+       Renode *left;
+       Rune r;
+       union
+       {
+               Rune r1;
+               int sub;
+               Renode *right;
+       };
+       int nclass;
+};
+struct Rethread
+{
+       Reinst *pc;
+       Resub sem[NSUBEXPM];
+       int pri;
+       Rethread *next;
+};
+struct Reinst
+{
+       char op;
+       int gen;
+       Reinst *a;
+       union
+       {
+               Rune r;
+               int sub;
+       };
+       union
+       {
+               Rune r1;
+               Reinst *b;
+       };
+};
+
+static int lex(Parselex*);
+static void getnextr(Parselex*);
+static void getnextrlit(Parselex*);
+static void getclass(Parselex*);
+static Renode *e0(Parselex*);
+static Renode *e1(Parselex*);
+static Renode *e2(Parselex*);
+static Renode *e3(Parselex*);
+static Renode *buildclass(Parselex*);
+static Renode *buildclassn(Parselex*);
+static int pcmp(void*, void*);
+static Reprog *regcomp1(char*, int, int);
+static Reinst *compile(Renode*, Reprog*, int);
+static Reinst *compile1(Renode*, Reinst*, int*, int);
+static void prtree(Renode*, int, int);
diff --git a/sys/src/libregexp/regprint.c b/sys/src/libregexp/regprint.c
new file mode 100644 (file)
index 0000000..673ca1f
--- /dev/null
@@ -0,0 +1,66 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+#include <regimpl.h>
+
+static int
+fmtprinst(Fmt *f, Reinst *inst)
+{
+       int r;
+
+       r = fmtprint(f, "%p ", inst);
+       switch(inst->op) {
+       case ORUNE:
+               r += fmtprint(f, "ORUNE\t%C\n", inst->r);
+               break;
+       case ONOTNL:
+               r += fmtprint(f, "ONOTNL\n");
+               break;
+       case OCLASS:
+               r += fmtprint(f, "OCLASS\t%C-%C %p\n", inst->r, inst->r1, inst->a);
+               break;
+       case OSPLIT:
+               r += fmtprint(f, "OSPLIT\t%p %p\n", inst->a, inst->b);
+               break;
+       case OJMP:
+               r += fmtprint(f, "OJMP \t%p\n", inst->a);
+               break;
+       case OSAVE:
+               r += fmtprint(f, "OSAVE\t%d\n", inst->sub);
+               break;
+       case OUNSAVE:
+               r += fmtprint(f, "OUNSAVE\t%d\n", inst->sub);
+               break;
+       case OANY:
+               r += fmtprint(f, "OANY \t.\n");
+               break;
+       case OEOL:
+               r += fmtprint(f, "OEOL \t$\n");
+               break;
+       case OBOL:
+               r += fmtprint(f, "OBOL \t^\n");
+               break;
+       }
+       return r;
+}
+
+static int
+fmtprprog(Fmt *f, Reprog *reprog)
+{
+       Reinst *inst;
+       int r;
+
+       r = 0;
+       for(inst = reprog->startinst; inst < reprog->startinst + reprog->len; inst++)
+               r += fmtprinst(f, inst);
+       return r;
+}
+
+int
+reprogfmt(Fmt *f)
+{
+       Reprog *r;
+
+       r = va_arg(f->args, Reprog*);
+       return fmtprprog(f, r);
+}
index 2474d29894d3a8ff84fe6164a336a78efc17113e..d11d86f25a47d3320b780dda5a3de8747106478e 100644 (file)
@@ -1,63 +1,66 @@
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
 
-/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp,       /* source string */
-       char *dp,       /* destination string */
-       int dlen,
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements pointed to by mp */
+void
+regsub(char *src, char *dst, int dlen, Resub *match, int msize)
 {
-       char *ssp, *ep;
        int i;
+       char *ep, c;
 
-       ep = dp+dlen-1;
-       while(*sp != '\0'){
-               if(*sp == '\\'){
-                       switch(*++sp){
-                       case '0':
-                       case '1':
-                       case '2':
-                       case '3':
-                       case '4':
-                       case '5':
-                       case '6':
-                       case '7':
-                       case '8':
-                       case '9':
-                               i = *sp-'0';
-                               if(mp!=0 && mp[i].sp != 0 && ms>i)
-                                       for(ssp = mp[i].sp;
-                                            ssp < mp[i].ep;
-                                            ssp++)
-                                               if(dp < ep)
-                                                       *dp++ = *ssp;
-                               break;
-                       case '\\':
-                               if(dp < ep)
-                                       *dp++ = '\\';
-                               break;
-                       case '\0':
-                               sp--;
-                               break;
-                       default:
-                               if(dp < ep)
-                                       *dp++ = *sp;
-                               break;
+       ep = dst + dlen-1;
+       for(;*src != '\0'; src++) switch(*src) {
+       case '\\':
+               switch(*++src) {
+               case '0':
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+                       i = *src - '0';
+                       if(match != nil && i < msize && match[i].ep != nil) {
+                               c = *match[i].ep;
+                               *match[i].ep = '\0';
+                               dst = strecpy(dst, ep+1, match[i].sp);
+                               *match[i].ep = c;
                        }
-               }else if(*sp == '&'){
-                       if(mp!=0 && mp[0].sp != 0 && ms>0)
-                               for(ssp = mp[0].sp;
-                                    ssp < mp[0].ep; ssp++)
-                                       if(dp < ep)
-                                               *dp++ = *ssp;
-               }else{
-                       if(dp < ep)
-                               *dp++ = *sp;
+                       break;
+               case '\\':
+                       if(dst < ep)
+                               *dst++ = '\\';
+                       else
+                               goto End;
+                       break;
+               case '\0':
+                       goto End;
+               default:
+                       if(dst < ep)
+                               *dst++ = *src;
+                       else
+                               goto End;
+                       break;
                }
-               sp++;
+               break;
+       case '&':
+               if(match != nil && msize > 0 && match[0].sp != nil) {
+                       c = *match[0].ep;
+                       *match[0].ep = '\0';
+                       dst = strecpy(dst, ep+1, match[0].sp);
+                       *match[0].ep = c;
+               }
+               break;
+       default:
+               if(dst < ep)
+                       *dst++ = *src;
+               else
+                       goto End;
+               break;
        }
-       *dp = '\0';
+End:
+       *dst = '\0';
 }
index 301a3589b1ab93e0b2b85fa4a23c0d53dcf8f612..557580b9c58f2d72b1a1be7fdfa885fc6fb450c8 100644 (file)
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
-#include "regcomp.h"
+#include <regexp.h>
+#include "regimpl.h"
 
-/*
- *  return     0 if no match
- *             >0 if a match
- *             <0 if we ran out of _relist space
- */
-static int
-rregexec1(Reprog *progp,       /* program to run */
-       Rune *bol,              /* string to run machine on */
-       Resub *mp,              /* subexpression elements */
-       int ms,                 /* number of elements at mp */
-       Reljunk *j)
+typedef struct RethreadQ RethreadQ;
+struct RethreadQ
 {
-       int flag=0;
-       Reinst *inst;
-       Relist *tlp;
-       Rune *s;
-       int i, checkstart;
-       Rune r, *rp, *ep;
-       Relist* tl;             /* This list, next list */
-       Relist* nl;
-       Relist* tle;            /* ends of this and next list */
-       Relist* nle;
-       int match;
-       Rune *p;
+       Rethread *head;
+       Rethread **tail;
+};
 
-       match = 0;
-       checkstart = j->startchar;
-       if(mp)
-               for(i=0; i<ms; i++) {
-                       mp[i].rsp = 0;
-                       mp[i].rep = 0;
-               }
-       j->relist[0][0].inst = 0;
-       j->relist[1][0].inst = 0;
+int
+rregexec(Reprog *prog, Rune *str, Resub *sem, int msize)
+{
+       RethreadQ lists[2], *clist, *nlist, *tmp;
+       Rethread *t, *next, *pooltop, *avail;
+       Reinst *curinst;
+       Rune *rsp, *rep, endr, last;
+       int match, first, gen, pri, matchpri;
 
-       /* Execute machine once for each character, including terminal NUL */
-       s = j->rstarts;
-       do{
-               /* fast check for first char */
-               if(checkstart) {
-                       switch(j->starttype) {
-                       case RUNE:
-                               p = runestrchr(s, j->startchar);
-                               if(p == 0 || s == j->reol)
-                                       return match;
-                               s = p;
-                               break;
-                       case BOL:
-                               if(s == bol)
-                                       break;
-                               p = runestrchr(s, '\n');
-                               if(p == 0 || s == j->reol)
-                                       return match;
-                               s = p+1;
-                               break;
-                       }
-               }
+       if(msize > NSUBEXPM)
+               msize = NSUBEXPM;
 
-               r = *s;
+       if(prog->startinst->gen != 0) {
+               for(curinst = prog->startinst; curinst < prog->startinst + prog->len; curinst++)
+                       curinst->gen = 0;
+       }
 
-               /* switch run lists */
-               tl = j->relist[flag];
-               tle = j->reliste[flag];
-               nl = j->relist[flag^=1];
-               nle = j->reliste[flag];
-               nl->inst = 0;
+       clist = lists;
+       clist->head = nil;
+       clist->tail = &clist->head;
+       nlist = lists + 1;
+       nlist->head = nil;
+       nlist->tail = &nlist->head;
 
-               /* Add first instruction to current list */
-               _rrenewemptythread(tl, progp->startinst, ms, s);
+       pooltop = prog->threads + prog->nthr;
+       avail = nil;
 
-               /* Execute machine until current list is empty */
-               for(tlp=tl; tlp->inst; tlp++){
-                       for(inst=tlp->inst; ; inst = inst->next){
-                               switch(inst->type){
-                               case RUNE:      /* regular character */
-                                       if(inst->r == r)
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case LBRA:
-                                       tlp->se.m[inst->subid].rsp = s;
-                                       continue;
-                               case RBRA:
-                                       tlp->se.m[inst->subid].rep = s;
-                                       continue;
-                               case ANY:
-                                       if(r != '\n')
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case ANYNL:
-                                       if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case BOL:
-                                       if(s == bol || *(s-1) == '\n')
-                                               continue;
-                                       break;
-                               case EOL:
-                                       if(s == j->reol || r == 0 || r == '\n')
-                                               continue;
-                                       break;
-                               case CCLASS:
-                                       ep = inst->cp->end;
-                                       for(rp = inst->cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1]){
-                                                       if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                               return -1;
-                                                       break;
-                                               }
-                                       break;
-                               case NCCLASS:
-                                       ep = inst->cp->end;
-                                       for(rp = inst->cp->spans; rp < ep; rp += 2)
-                                               if(r >= rp[0] && r <= rp[1])
-                                                       break;
-                                       if(rp == ep)
-                                               if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
-                                                       return -1;
-                                       break;
-                               case OR:
-                                       /* evaluate right choice later */
-                                       if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
-                                               return -1;
-                                       /* efficiency: advance and re-evaluate */
-                                       continue;
-                               case END:       /* Match! */
-                                       match = 1;
-                                       tlp->se.m[0].rep = s;
-                                       if(mp != 0)
-                                               _renewmatch(mp, ms, &tlp->se);
-                                       break;
+       pri = matchpri = gen = match = 0;
+       rsp = str;
+       rep = nil;
+       endr = L'\0';
+       if(sem != nil && msize > 0) {
+               if(sem->rsp != nil)
+                       rsp = sem->rsp;
+               if(sem->rep != nil && *sem->rep != L'\0') {
+                       rep = sem->rep;
+                       endr = *sem->rep;
+                       *sem->rep = '\0';
+               }
+       }
+       last = 1;
+       for(; last != L'\0'; rsp++) {
+               gen++;
+               last = *rsp;
+               first = 1;
+               t = clist->head;
+               if(t == nil)
+                       goto Start;
+               curinst = t->pc;
+Again:
+               if(curinst->gen == gen)
+                       goto Done;
+               curinst->gen = gen;
+               switch(curinst->op) {
+               case ORUNE:
+                       if(*rsp != curinst->r)
+                               goto Done;
+               case OANY: /* fallthrough */
+               Any:
+                       next = t->next;
+                       t->pc = curinst + 1;
+                       t->next = nil;
+                       *nlist->tail = t;
+                       nlist->tail = &t->next;
+                       if(next == nil)
+                               break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               case OCLASS:
+               Class:
+                       if(*rsp < curinst->r)
+                               goto Done;
+                       if(*rsp > curinst->r1) {
+                               curinst++;
+                               goto Class;
+                       }
+                       next = t->next;
+                       t->pc = curinst->a;
+                       t->next = nil;
+                       *nlist->tail = t;
+                       nlist->tail = &t->next;
+                       if(next == nil)
+                               break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               case ONOTNL:
+                       if(*rsp != L'\n') {
+                               curinst++;
+                               goto Again;
+                       }
+                       goto Done;
+               case OBOL:
+                       if(rsp == str || rsp[-1] == L'\n') {
+                               curinst++;
+                               goto Again;
+                       }
+                       goto Done;
+               case OEOL:
+                       if(*rsp == L'\0' && rep == nil) {
+                               curinst++;
+                               goto Again;
+                       }
+                       if(*rsp == '\n')
+                               goto Any;
+                       goto Done;
+               case OJMP:
+                       curinst = curinst->a;
+                       goto Again;
+               case OSPLIT:
+                       if(avail == nil)
+                               next = --pooltop;
+                       else {
+                               next = avail;
+                               avail = avail->next;
+                       }
+                       next->pc = curinst->b;
+                       if(msize > 0)
+                               memcpy(next->sem, t->sem, sizeof(Resub)*msize);
+                       next->pri = t->pri;
+                       next->next = t->next;
+                       t->next = next;
+                       curinst = curinst->a;
+                       goto Again;
+               case OSAVE:
+                       if(curinst->sub < msize)
+                               t->sem[curinst->sub].rsp = rsp;
+                       curinst++;
+                       goto Again;
+               case OUNSAVE:
+                       if(curinst->sub == 0) {
+                               /* "Highest" priority is the left-most longest. */
+                               if (t->pri > matchpri)
+                                       goto Done;
+                               match = 1;
+                               matchpri = t->pri;
+                               if(sem != nil && msize > 0) {
+                                       memcpy(sem, t->sem, sizeof(Resub)*msize);
+                                       sem->rep = rsp;
                                }
+                               goto Done;
+                       }
+                       if(curinst->sub < msize)
+                               t->sem[curinst->sub].rep = rsp;
+                       curinst++;
+                       goto Again;
+               Done:
+                       next = t->next;
+                       t->next = avail;
+                       avail = t;
+                       if(next == nil)
                                break;
+                       t = next;
+                       curinst = t->pc;
+                       goto Again;
+               }
+Start:
+               /* Start again once if we haven't found anything. */
+               if(first == 1 && match == 0) {
+                       first = 0;
+                       if(avail == nil)
+                               t = --pooltop;
+                       else {
+                               t = avail;
+                               avail = avail->next;
                        }
+                       if(msize > 0)
+                               memset(t->sem, 0, sizeof(Resub)*msize);
+                       /* "Lower" priority thread */
+                       t->pri = matchpri = pri++;
+                       t->next = nil;
+                       curinst = prog->startinst;
+                       goto Again;
                }
-               if(s == j->reol)
+               /* If we have a match and no extant threads, we are done. */
+               if(match == 1 && nlist->head == nil)
                        break;
-               checkstart = j->startchar && nl->inst==0;
-               s++;
-       }while(r);
-       return match;
-}
-
-static int
-rregexec2(Reprog *progp,       /* program to run */
-       Rune *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms,         /* number of elements at mp */
-       Reljunk *j
-)
-{
-       Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
-
-       /* mark space */
-       j->relist[0] = relist0;
-       j->relist[1] = relist1;
-       j->reliste[0] = relist0 + nelem(relist0) - 2;
-       j->reliste[1] = relist1 + nelem(relist1) - 2;
-
-       return rregexec1(progp, bol, mp, ms, j);
-}
-
-extern int
-rregexec(Reprog *progp,        /* program to run */
-       Rune *bol,      /* string to run machine on */
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements at mp */
-{
-       Reljunk j;
-       Relist relist0[LISTSIZE], relist1[LISTSIZE];
-       int rv;
-
-       /*
-        *  use user-specified starting/ending location if specified
-        */
-       j.rstarts = bol;
-       j.reol = 0;
-       if(mp && ms>0){
-               if(mp->sp)
-                       j.rstarts = mp->rsp;
-               if(mp->ep)
-                       j.reol = mp->rep;
+               tmp = clist;
+               clist = nlist;
+               nlist = tmp;
+               nlist->head = nil;
+               nlist->tail = &nlist->head;
        }
-       j.starttype = 0;
-       j.startchar = 0;
-       if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
-               j.starttype = RUNE;
-               j.startchar = progp->startinst->r;
-       }
-       if(progp->startinst->type == BOL)
-               j.starttype = BOL;
-
-       /* mark space */
-       j.relist[0] = relist0;
-       j.relist[1] = relist1;
-       j.reliste[0] = relist0 + nelem(relist0) - 2;
-       j.reliste[1] = relist1 + nelem(relist1) - 2;
-
-       rv = rregexec1(progp, bol, mp, ms, &j);
-       if(rv >= 0)
-               return rv;
-       rv = rregexec2(progp, bol, mp, ms, &j);
-       if(rv >= 0)
-               return rv;
-       return -1;
+       if(rep != nil)
+               *rep = endr;
+       return match;
 }
index c395583d0fb0909c126ec7e5d59d5a443f54f684..88cbbfc893b99f5ffbbce2d25017fd66798fda5b 100644 (file)
@@ -1,64 +1,66 @@
 #include <u.h>
 #include <libc.h>
-#include "regexp.h"
+#include <regexp.h>
 
-/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(Rune *sp,      /* source string */
-       Rune *dp,       /* destination string */
-       int dlen,
-       Resub *mp,      /* subexpression elements */
-       int ms)         /* number of elements pointed to by mp */
+void
+rregsub(Rune *src, Rune *dst, int dlen, Resub *match, int msize)
 {
-       Rune *ssp, *ep;
        int i;
+       Rune *ep, r;
 
-       ep = dp+(dlen/sizeof(Rune))-1;
-       while(*sp != '\0'){
-               if(*sp == '\\'){
-                       switch(*++sp){
-                       case '0':
-                       case '1':
-                       case '2':
-                       case '3':
-                       case '4':
-                       case '5':
-                       case '6':
-                       case '7':
-                       case '8':
-                       case '9':
-                               i = *sp-'0';
-                               if(mp[i].rsp != 0 && mp!=0 && ms>i)
-                                       for(ssp = mp[i].rsp;
-                                            ssp < mp[i].rep;
-                                            ssp++)
-                                               if(dp < ep)
-                                                       *dp++ = *ssp;
-                               break;
-                       case '\\':
-                               if(dp < ep)
-                                       *dp++ = '\\';
-                               break;
-                       case '\0':
-                               sp--;
-                               break;
-                       default:
-                               if(dp < ep)
-                                       *dp++ = *sp;
-                               break;
+       ep = dst + dlen-1;
+       for(;*src != L'\0'; src++) switch(*src) {
+       case L'\\':
+               switch(*++src) {
+               case L'0':
+               case L'1':
+               case L'2':
+               case L'3':
+               case L'4':
+               case L'5':
+               case L'6':
+               case L'7':
+               case L'8':
+               case L'9':
+                       i = *src - L'0';
+                       if(match != nil && i < msize && match[i].rsp != nil) {
+                               r = *match[i].rep;
+                               *match[i].rep = L'\0';
+                               dst = runestrecpy(dst, ep+1, match[i].rsp);
+                               *match[i].rep = r;
                        }
-               }else if(*sp == '&'){                           
-                       if(mp[0].rsp != 0 && mp!=0 && ms>0)
-                       if(mp[0].rsp != 0)
-                               for(ssp = mp[0].rsp;
-                                    ssp < mp[0].rep; ssp++)
-                                       if(dp < ep)
-                                               *dp++ = *ssp;
-               }else{
-                       if(dp < ep)
-                               *dp++ = *sp;
+                       break;
+               case L'\\':
+                       if(dst < ep)
+                               *dst++ = L'\\';
+                       else
+                               goto End;
+                       break;
+               case L'\0':
+                       goto End;
+               default:
+                       if(dst < ep)
+                               *dst++ = *src;
+                       else
+                               goto End;
+                       break;
                }
-               sp++;
+               break;
+       case L'&':
+               if(match != nil && msize > 0 && match[0].rsp != nil) {
+                       r = *match[0].rep;
+                       *match[0].rep = L'\0';
+                       dst = runestrecpy(dst, ep+1, match[0].rsp);
+                       *match[0].rep = r;
+               }
+               break;
+       default:
+               if(dst < ep)
+                       *dst++ = *src;
+               else
+                       goto End;
+               break;
        }
-       *dp = '\0';
+End:
+       *dst = L'\0';
 }