10 DEPTH = 20, /* max nesting depth of {} */
11 MAXCMDS = 512, /* max sed commands */
12 ADDSIZE = 10000, /* size of add & read buffer */
13 MAXADDS = 20, /* max pending adds and reads */
14 LBSIZE = 8192, /* input line size */
15 LABSIZE = 50, /* max number of labels */
16 MAXSUB = 10, /* max number of sub reg exp */
17 MAXFILES = 120, /* max output files */
21 * An address is a line #, a R.E., "$", a reference to the last
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
38 typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
55 /* Command Codes for field SedCom.command */
84 typedef struct label { /* Label symbol table */
85 Rune uninm[9]; /* Label name */
87 SedCom *address; /* Command associated with label */
90 typedef struct FILE_CACHE { /* Data file control block */
91 struct FILE_CACHE *next; /* Forward Link */
92 char *name; /* Name of file */
95 SedCom pspace[MAXCMDS]; /* Command storage */
96 SedCom *pend = pspace+MAXCMDS; /* End of command storage */
97 SedCom *rep = pspace; /* Current fill point */
99 int dollars; /* Number of dollar (first) addresses */
101 Reprog *lastre; /* Last regular expression */
102 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
104 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
105 Rune *addend = addspace+ADDSIZE;
107 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
108 SedCom **aptr = abuf;
110 struct { /* Sed program input control block */
111 enum PTYPE { /* Either on command line or in file */
115 union PCTL { /* Pointer to data */
121 Rune genbuf[LBSIZE+1]; /* Miscellaneous buffer */
123 FileCache *fhead; /* Head of File Cache Chain */
124 FileCache *ftail; /* Tail of File Cache Chain */
126 Rune *loc1; /* Start of pattern match */
127 Rune *loc2; /* End of pattern match */
128 Rune seof; /* Pattern delimiter char */
130 Rune linebuf[LBSIZE+1]; /* Input data buffer */
131 Rune *lbend = linebuf+LBSIZE; /* End of buffer */
132 Rune *spend = linebuf; /* End of input data */
133 Rune *cp; /* Current scan point in linebuf */
135 Rune holdsp[LBSIZE+1]; /* Hold buffer */
136 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
137 Rune *hspend = holdsp; /* End of hold data */
139 int nflag; /* Command line flags */
143 int dolflag; /* Set when at true EOF */
144 int sflag; /* Set when substitution done */
145 int jflag; /* Set when jump required */
146 int delflag; /* Delete current line when set */
148 long lnum; /* Input line count */
150 char fname[MAXFILES][40]; /* File name cache */
151 Biobuf *fcode[MAXFILES]; /* File ID cache */
152 int nfiles; /* Cache fill point */
154 Biobuf fout; /* Output stream */
155 Biobuf stdin; /* Default input */
156 Biobuf* f; /* Input data */
158 Label ltab[LABSIZE]; /* Label name symbol table */
159 Label *labend = ltab+LABSIZE; /* End of label table */
160 Label *lab = ltab+1; /* Current Fill point */
162 int depth; /* {} stack pointer */
164 Rune bad; /* Dummy err ptr reference */
168 char CGMES[] = "%S command garbled: %S";
169 char TMMES[] = "Too much text: %S";
170 char LTL[] = "Label too long: %S";
171 char AD0MES[] = "No addresses allowed: %S";
172 char AD1MES[] = "Only one address allowed: %S";
174 void address(Addr *);
176 int cmp(char *, char *);
177 int rcmp(Rune *, Rune *);
178 void command(SedCom *);
179 Reprog *compile(void);
180 Rune *compsub(Rune *, Rune *);
185 int executable(SedCom *);
190 int match(Reprog *, Rune *);
191 void newfile(enum PTYPE, char *);
193 Biobuf *open_file(char *);
194 Rune *place(Rune *, Rune *, Rune *);
195 void quit(char *, ...);
196 int rline(Rune *, Rune *);
197 Label *search(Label *);
198 int substitute(SedCom *);
200 Rune *stext(Rune *, Rune *);
203 void putline(Biobuf *bp, Rune *buf, int n);
206 main(int argc, char **argv)
211 Binit(&fout, 1, OWRITE);
213 fcode[nfiles++] = &fout;
221 quit("missing pattern");
222 newfile(P_ARG, ARGF());
228 quit("no pattern-file");
229 newfile(P_FILE, ARGF());
242 case 'E': case 'r': /* unix compat */
245 quit("Unknown flag: %c", ARGC());
250 quit("missing pattern");
251 newfile(P_ARG, *argv++);
256 quit("Too many {'s");
258 ltab[0].address = rep;
263 enroll(nil); /* Add stdin to cache */
278 static Rune *p = addspace;
279 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
281 while (rline(linebuf, lbend) >= 0) {
284 while(*cp == L' ' || *cp == L'\t')
286 if(*cp == L'\0' || *cp == L'#')
294 if (rep->ad1.type != A_NONE) {
295 if (rep->ad1.type == A_DOL)
297 if (rep->ad1.type == A_LAST) {
299 quit("First RE may not be null");
300 rep->ad1.type = A_RE;
301 rep->ad1.rp = lastre;
303 if(*cp == L',' || *cp == L';') {
306 if (rep->ad2.type == A_LAST) {
307 rep->ad2.type = A_RE;
308 rep->ad2.rp = lastre;
311 rep->ad2.type = A_NONE;
314 while(*cp == L' ' || *cp == L'\t')
319 quit("Unrecognized command: %S", linebuf);
327 rep->negfl = !rep->negfl;
328 cmpend[depth++] = &rep->lb1;
330 quit("Too many commands: %S", linebuf);
336 if(rep->ad1.type != A_NONE)
337 quit(AD0MES, linebuf);
339 quit("Too many }'s");
340 *cmpend[depth] = rep;
346 rep->command = EQCOM;
347 if(rep->ad2.type != A_NONE)
348 quit(AD1MES, linebuf);
352 if(rep->ad1.type != A_NONE)
353 quit(AD0MES, linebuf);
358 while (*cp && *cp != L';' && *cp != L' ' &&
359 *cp != L'\t' && *cp != L'#') {
361 if(tp >= &lab->uninm[8])
366 if (*lab->uninm == L'\0') /* no label? */
367 quit(CGMES, L":", linebuf);
368 if(lpt = search(lab)) {
370 quit("Duplicate labels: %S", linebuf);
375 quit("Too many labels: %S", linebuf);
380 rep--; /* reuse this slot */
385 if(rep->ad2.type != A_NONE)
386 quit(AD1MES, linebuf);
390 quit(CGMES, L"a", linebuf);
392 p = stext(p, addend);
399 quit(CGMES, L"c", linebuf);
401 p = stext(p, addend);
405 if(rep->ad2.type != A_NONE)
406 quit(AD1MES, linebuf);
410 quit(CGMES, L"i", linebuf);
412 p = stext(p, addend);
420 rep->command = CGCOM;
428 rep->command = CHCOM;
440 if(*cp == L'\0' || *cp == L';') {
441 /* no label; jump to end */
442 if(pt = ltab[0].chain) {
443 while((pt1 = pt->lb1) != nil)
451 /* copy label into lab->uninm */
453 while((*tp = *cp++) != L'\0' && *tp != L';')
454 if(++tp >= &lab->uninm[8])
459 if (*lab->uninm == L'\0')
460 /* shouldn't get here */
461 quit(CGMES, L"b or t", linebuf);
462 if((lpt = search(lab)) != nil) {
464 rep->lb1 = lpt->address;
466 for(pt = lpt->chain; pt != nil &&
467 (pt1 = pt->lb1) != nil; pt = pt1)
472 } else { /* add new label */
476 quit("Too many labels: %S", linebuf);
485 rep->command = CNCOM;
493 rep->command = CPCOM;
498 if(rep->ad2.type != A_NONE)
499 quit(AD1MES, linebuf);
501 quit(CGMES, L"r", linebuf);
503 p = stext(p, addend);
511 rep->command = CDCOM;
517 if(rep->ad2.type != A_NONE)
518 quit(AD1MES, linebuf);
528 if ((rep->re1 = compile()) == 0) {
530 quit("First RE may not be null");
534 if((p = compsub(p, addend)) == 0)
535 quit(CGMES, L"s", linebuf);
555 quit(CGMES, L"s", linebuf);
557 for(i = nfiles - 1; i >= 0; i--)
558 if(cmp(fname[nfiles], fname[i]) == 0) {
559 rep->fcode = fcode[i];
562 if(nfiles >= MAXFILES)
563 quit("Too many files in w commands 1");
564 rep->fcode = open_file(fname[nfiles]);
571 quit(CGMES, L"w", linebuf);
573 for(i = nfiles - 1; i >= 0; i--)
574 if(cmp(fname[nfiles], fname[i]) == 0) {
575 rep->fcode = fcode[i];
578 if(nfiles >= MAXFILES){
579 fprint(2, "sed: Too many files in w commands 2 \n");
580 fprint(2, "nfiles = %d; MAXF = %d\n",
584 rep->fcode = open_file(fname[nfiles]);
595 quit(CGMES, L"y", linebuf);
601 quit("Too many commands, last: %S", linebuf);
605 quit(CGMES, cp - 1, linebuf);
611 open_file(char *name)
616 if ((bp = malloc(sizeof(Biobuf))) == 0)
617 quit("Out of memory");
618 if ((fd = open(name, OWRITE)) < 0 &&
619 (fd = create(name, OWRITE, 0666)) < 0)
620 quit("Cannot create %s", name);
621 Binit(bp, fd, OWRITE);
624 fcode[nfiles++] = bp;
629 compsub(Rune *rhs, Rune *end)
633 while ((r = *cp++) != '\0') {
666 if((c = *cp++) == seof) /* L'//' */
670 if (c == L'\0' || c == L'\n')
671 quit(TMMES, linebuf);
673 if (ep >= expbuf+sizeof(expbuf))
674 quit(TMMES, linebuf);
675 ep += runetochar(ep, &c);
676 if ((c = *cp++) == L'n')
679 if (ep >= expbuf + sizeof(expbuf))
680 quit(TMMES, linebuf);
681 ep += runetochar(ep, &c);
682 } while ((c = *cp++) != seof);
684 return lastre = regcomp(expbuf);
691 quit(CGMES, L"r.e.-using", linebuf);
695 flushout(Biobufhdr *bp, void *v, long n)
699 for(i = 0; i < nfiles; i++)
701 return read(bp->fid, v, n);
705 newfile(enum PTYPE type, char *name)
710 if ((prog.bp = Bopen(name, OREAD)) == 0)
711 quit("Cannot open pattern-file: %s\n", name);
712 Blethal(prog.bp, nil);
713 if(uflag) Biofn(prog.bp, flushout);
719 rline(Rune *buf, Rune *end)
724 while ((c = getrune()) >= 0) {
729 if ((c = getrune()) < 0)
732 } else if (r == '\n') {
750 if (prog.type == P_ARG) {
751 if ((p = prog.curr) != 0) {
753 prog.curr += chartorune(&r, p);
756 c = '\n'; /* fake an end-of-line */
761 } else if ((c = Bgetrune(prog.bp)) < 0)
772 if((c = *cp++) == '$')
776 if (ap->rp = compile())
781 else if (c >= '0' && c <= '9') {
783 while ((c = *cp) >= '0' && c <= '9')
784 lno = lno*10 + *cp++ - '0';
786 quit("line number 0 is illegal",0);
796 cmp(char *a, char *b) /* compare characters */
805 rcmp(Rune *a, Rune *b) /* compare runes */
816 text(char *p) /* extract character string */
820 while(*cp == ' ' || *cp == '\t')
823 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
826 while (*cp == ' ' || *cp == '\t')
828 p += runetochar(p, &r);
835 stext(Rune *p, Rune *end) /* extract rune string */
837 while(*cp == L' ' || *cp == L'\t')
840 if (*cp == L'\\' && *++cp == L'\0')
843 quit(TMMES, linebuf);
844 if ((*p++ = *cp++) == L'\n')
845 while(*cp == L' ' || *cp == L'\t')
858 for (rp = ltab; rp < ptr; rp++)
859 if(rcmp(rp->uninm, ptr->uninm) == 0)
868 SedCom *rptr, *trptr;
870 for(lptr = ltab; lptr < lab; lptr++) {
871 if(lptr->address == 0)
872 quit("Undefined label: %S", lptr->uninm);
875 while((trptr = rptr->lb1) != nil) {
876 rptr->lb1 = lptr->address;
879 rptr->lb1 = lptr->address;
892 for(tsp = cp; *tsp != seof; tsp++) {
895 if(*tsp == L'\n' || *tsp == L'\0')
901 if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
902 quit("Out of memory");
903 *rp++ = highc; /* save upper bound */
904 for (i = 0; i <= highc; i++)
907 while((c = *sp++) != seof) {
908 if(c == L'\\' && *sp == L'n') {
912 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
916 if(rp[c] == seof || rp[c] == L'\0') {
936 while (spend = gline(linebuf)){
937 for(ipc = pspace; ipc->command; ) {
938 if (!executable(ipc)) {
948 if((ipc = ipc->lb1) == 0)
953 if(!nflag && !delflag)
954 putline(&fout, linebuf, spend - linebuf);
961 /* determine if a statement should be applied to an input line */
963 executable(SedCom *ipc)
965 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
966 if (ipc->active == 1) /* Second line */
968 switch(ipc->ad2.type) {
969 case A_NONE: /* No second addr; use first */
972 case A_DOL: /* Accept everything */
974 case A_LINE: /* Line at end of range? */
975 if (lnum <= ipc->ad2.line) {
976 if (ipc->ad2.line == lnum)
980 ipc->active = 0; /* out of range */
982 case A_RE: /* Check for matching R.E. */
983 if (match(ipc->ad2.rp, linebuf))
987 quit("Internal error");
990 switch (ipc->ad1.type) { /* Check first address */
991 case A_NONE: /* Everything matches */
993 case A_DOL: /* Only last line */
997 case A_LINE: /* Check line number */
998 if (ipc->ad1.line == lnum) {
999 ipc->active = 1; /* In range */
1003 case A_RE: /* Check R.E. */
1004 if (match(ipc->ad1.rp, linebuf)) {
1005 ipc->active = 1; /* In range */
1010 quit("Internal error");
1016 match(Reprog *pattern, Rune *buf)
1020 subexp[0].rsp = buf;
1022 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1023 loc1 = subexp[0].rsp;
1024 loc2 = subexp[0].rep;
1032 substitute(SedCom *ipc)
1036 if(!match(ipc->re1, linebuf))
1040 * we have at least one match. some patterns, e.g. '$' or '^', can
1041 * produce 0-length matches, so during a global substitute we must
1042 * bump to the character after a 0-length match to keep from looping.
1045 if(ipc->gfl == 0) /* single substitution */
1048 do{ /* global substitution */
1049 len = loc2 - loc1; /* length of match */
1050 dosub(ipc->rhs); /* dosub moves loc2 */
1051 if(*loc2 == 0) /* end of string */
1053 if(len == 0) /* zero-length R.E. match */
1054 loc2++; /* bump over 0-length match */
1055 if(*loc2 == 0) /* end of string */
1057 } while(match(ipc->re1, loc2));
1074 sp = place(sp, loc1, loc2);
1077 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1079 if (subexp[n].rsp && subexp[n].rep) {
1080 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1084 quit("Invalid back reference \\%d", n);
1088 if (sp >= &genbuf[LBSIZE])
1089 quit("Output line too long");
1092 loc2 = sp - genbuf + linebuf;
1093 while (*sp++ = *lp++)
1094 if (sp >= &genbuf[LBSIZE])
1095 quit("Output line too long");
1098 while (*lp++ = *sp++)
1104 place(Rune *sp, Rune *l1, Rune *l2)
1108 if (sp >= &genbuf[LBSIZE])
1109 quit("Output line too long");
1117 static char buf[] = "\\x0000";
1118 static char hex[] = "0123456789abcdef";
1132 buf[2] = hex[(c>>12)&0xF];
1133 buf[3] = hex[(c>>8)&0xF];
1134 buf[4] = hex[(c>>4)&0xF];
1135 buf[5] = hex[c&0xF];
1140 command(SedCom *ipc)
1144 Rune *execp, *p1, *p2, *rp;
1146 switch(ipc->command) {
1149 if(aptr >= abuf+MAXADDS)
1150 quit("Too many appends after line %ld", lnum);
1155 if(ipc->active == 1) {
1156 for(rp = ipc->text; *rp; rp++)
1157 Bputrune(&fout, *rp);
1166 while(*p1 != '\n') {
1173 while(*p2++ = *p1++)
1179 Bprint(&fout, "%ld\n", lnum);
1184 while(*p1++ = *p2++)
1192 while(*p1++ = *p2++)
1200 while(*p1++ = *p2++);
1207 while(*p1++ = *p2++)
1213 for(rp = ipc->text; *rp; rp++)
1214 Bputrune(&fout, *rp);
1222 for (i = 0, rp = linebuf; *rp; rp++) {
1224 if(c >= 0x20 && c < 0x7F && c != '\\') {
1227 Bprint(&fout, "\\\n");
1231 for (ucp = trans(*rp); *ucp; ucp++){
1235 Bprint(&fout, "\\\n");
1242 Bprint(&fout, "\\n");
1247 putline(&fout, linebuf, spend-linebuf);
1251 if((execp = gline(linebuf)) == 0) {
1261 if((execp = gline(spend)) == 0) {
1268 putline(&fout, linebuf, spend-linebuf);
1272 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1278 putline(&fout, linebuf, spend-linebuf);
1284 if(aptr >= &abuf[MAXADDS])
1285 quit("Too many reads after line %ld", lnum);
1289 i = substitute(ipc);
1292 putline(&fout, linebuf, spend-linebuf);
1308 putline(ipc->fcode,linebuf, spend - linebuf);
1313 while(*p2++ = *p1++)
1317 while(*p2++ = *p1++)
1322 while(*p2++ = *p1++)
1329 for (i = *p2++; *p1; p1++)
1337 putline(Biobuf *bp, Rune *buf, int n)
1340 Bputrune(bp, *buf++);
1353 for (aptr = abuf; *aptr; aptr++) {
1354 if((*aptr)->command == ACOM) {
1355 for(p1 = (*aptr)->text; *p1; p1++ )
1356 Bputrune(&fout, *p1);
1359 for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++)
1360 s += runetochar(s, p1);
1362 if((fi = Bopen(buf, OREAD)) == 0)
1365 if(uflag) Biofn(fi, flushout);
1366 while((c = Bgetc(fi)) >= 0)
1382 quit(char *fmt, ...)
1388 ep = msg + sizeof msg;
1389 p = seprint(msg, ep, "sed: ");
1391 p = vseprint(p, ep, fmt, arg);
1393 p = seprint(p, ep, "\n");
1394 write(2, msg, p - msg);
1403 static long peekc = 0;
1405 if (f == 0 && opendata() < 0)
1409 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1412 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1414 if (dollars != 0 && (peekc = Bgetrune(f)) < 0 && fhead == nil)
1422 /* return partial final line, adding implicit newline */
1432 } while (opendata() > 0); /* Switch to next stream */
1438 * Data file input section - the intent is to transparently
1439 * catenate all data input streams.
1442 enroll(char *filename) /* Add a file to the input file cache */
1446 if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1447 quit("Out of memory");
1454 fp->name = filename; /* 0 => stdin */
1463 if ((f = Bopen(fhead->name, OREAD)) == nil)
1464 quit("Can't open %s", fhead->name);
1466 Binit(&stdin, 0, OREAD);
1470 if(uflag) Biofn(f, flushout);
1471 fhead = fhead->next;