]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
merge
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 int     dollars;                        /* Number of dollar addresses */
100
101 Reprog  *lastre;                        /* Last regular expression */
102 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
103
104 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
105 Rune    *addend = addspace+ADDSIZE;
106
107 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
108 SedCom  **aptr = abuf;
109
110 struct {                                /* Sed program input control block */
111         enum PTYPE {                    /* Either on command line or in file */
112                 P_ARG,
113                 P_FILE,
114         } type;
115         union PCTL {                    /* Pointer to data */
116                 Biobuf  *bp;
117                 char    *curr;
118         };
119 } prog;
120
121 Rune    genbuf[LBSIZE+1];               /* Miscellaneous buffer */
122
123 FileCache       *fhead;                 /* Head of File Cache Chain */
124 FileCache       *ftail;                 /* Tail of File Cache Chain */
125
126 Rune    *loc1;                          /* Start of pattern match */
127 Rune    *loc2;                          /* End of pattern match */
128 Rune    seof;                           /* Pattern delimiter char */
129
130 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
131 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
132 Rune    *spend = linebuf;               /* End of input data */
133 Rune    *cp;                            /* Current scan point in linebuf */
134
135 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
136 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
137 Rune    *hspend = holdsp;               /* End of hold data */
138
139 int     nflag;                          /* Command line flags */
140 int     gflag;
141 int     uflag;
142
143 int     dolflag;                        /* Set when at true EOF */
144 int     sflag;                          /* Set when substitution done */
145 int     jflag;                          /* Set when jump required */
146 int     delflag;                        /* Delete current line when set */
147
148 long    lnum;                           /* Input line count */
149
150 char    fname[MAXFILES][40];            /* File name cache */
151 Biobuf  *fcode[MAXFILES];               /* File ID cache */
152 int     nfiles;                         /* Cache fill point */
153
154 Biobuf  fout;                           /* Output stream */
155 Biobuf  stdin;                          /* Default input */
156 Biobuf* f;                              /* Input data */
157
158 Label   ltab[LABSIZE];                  /* Label name symbol table */
159 Label   *labend = ltab+LABSIZE;         /* End of label table */
160 Label   *lab = ltab+1;                  /* Current Fill point */
161
162 int     depth;                          /* {} stack pointer */
163
164 Rune    bad;                            /* Dummy err ptr reference */
165 Rune    *badp = &bad;
166
167
168 char    CGMES[]  =      "%S command garbled: %S";
169 char    TMMES[]  =      "Too much text: %S";
170 char    LTL[]    =      "Label too long: %S";
171 char    AD0MES[] =      "No addresses allowed: %S";
172 char    AD1MES[] =      "Only one address allowed: %S";
173
174 void    address(Addr *);
175 void    arout(void);
176 int     cmp(char *, char *);
177 int     rcmp(Rune *, Rune *);
178 void    command(SedCom *);
179 Reprog  *compile(void);
180 Rune    *compsub(Rune *, Rune *);
181 void    dechain(void);
182 void    dosub(Rune *);
183 void    enroll(char *);
184 void    errexit(void);
185 int     executable(SedCom *);
186 void    execute(void);
187 void    fcomp(void);
188 long    getrune(void);
189 Rune    *gline(Rune *);
190 int     match(Reprog *, Rune *);
191 void    newfile(enum PTYPE, char *);
192 int     opendata(void);
193 Biobuf  *open_file(char *);
194 Rune    *place(Rune *, Rune *, Rune *);
195 void    quit(char *, ...);
196 int     rline(Rune *, Rune *);
197 Label   *search(Label *);
198 int     substitute(SedCom *);
199 char    *text(char *);
200 Rune    *stext(Rune *, Rune *);
201 int     ycomp(SedCom *);
202 char *  trans(int c);
203 void    putline(Biobuf *bp, Rune *buf, int n);
204
205 void
206 main(int argc, char **argv)
207 {
208         int compfl;
209
210         lnum = 0;
211         Binit(&fout, 1, OWRITE);
212         Blethal(&fout, nil);
213         fcode[nfiles++] = &fout;
214         compfl = 0;
215
216         if(argc == 1)
217                 exits(nil);
218         ARGBEGIN{
219         case 'e':
220                 if (argc <= 1)
221                         quit("missing pattern");
222                 newfile(P_ARG, ARGF());
223                 fcomp();
224                 compfl = 1;
225                 continue;
226         case 'f':
227                 if(argc <= 1)
228                         quit("no pattern-file");
229                 newfile(P_FILE, ARGF());
230                 fcomp();
231                 compfl = 1;
232                 continue;
233         case 'g':
234                 gflag++;
235                 continue;
236         case 'n':
237                 nflag++;
238                 continue;
239         case 'u':
240                 uflag++;
241                 continue;
242         default:
243                 quit("Unknown flag: %c", ARGC());
244         } ARGEND
245
246         if(compfl == 0) {
247                 if (--argc < 0)
248                         quit("missing pattern");
249                 newfile(P_ARG, *argv++);
250                 fcomp();
251         }
252
253         if(depth)
254                 quit("Too many {'s");
255
256         ltab[0].address = rep;
257
258         dechain();
259
260         if(argc <= 0)
261                 enroll(nil);            /* Add stdin to cache */
262         else
263                 while(--argc >= 0)
264                         enroll(*argv++);
265         execute();
266         exits(nil);
267 }
268
269 void
270 fcomp(void)
271 {
272         int     i;
273         Label   *lpt;
274         Rune    *tp;
275         SedCom  *pt, *pt1;
276         static Rune     *p = addspace;
277         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
278
279         while (rline(linebuf, lbend) >= 0) {
280                 cp = linebuf;
281 comploop:
282                 while(*cp == L' ' || *cp == L'\t')
283                         cp++;
284                 if(*cp == L'\0' || *cp == L'#')
285                         continue;
286                 if(*cp == L';') {
287                         cp++;
288                         goto comploop;
289                 }
290
291                 address(&rep->ad1);
292                 if (rep->ad1.type != A_NONE) {
293                         if (rep->ad1.type == A_LAST) {
294                                 if (!lastre)
295                                         quit("First RE may not be null");
296                                 rep->ad1.type = A_RE;
297                                 rep->ad1.rp = lastre;
298                         }
299                         if(*cp == L',' || *cp == L';') {
300                                 cp++;
301                                 address(&rep->ad2);
302                                 if (rep->ad2.type == A_LAST) {
303                                         rep->ad2.type = A_RE;
304                                         rep->ad2.rp = lastre;
305                                 }
306                         } else
307                                 rep->ad2.type = A_NONE;
308                 }
309                 while(*cp == L' ' || *cp == L'\t')
310                         cp++;
311
312 swit:
313                 switch(*cp++) {
314                 default:
315                         quit("Unrecognized command: %S", linebuf);
316
317                 case '!':
318                         rep->negfl = 1;
319                         goto swit;
320
321                 case '{':
322                         rep->command = BCOM;
323                         rep->negfl = !rep->negfl;
324                         cmpend[depth++] = &rep->lb1;
325                         if(++rep >= pend)
326                                 quit("Too many commands: %S", linebuf);
327                         if(*cp == '\0')
328                                 continue;
329                         goto comploop;
330
331                 case '}':
332                         if(rep->ad1.type != A_NONE)
333                                 quit(AD0MES, linebuf);
334                         if(--depth < 0)
335                                 quit("Too many }'s");
336                         *cmpend[depth] = rep;
337                         if(*cp == 0)
338                                 continue;
339                         goto comploop;
340
341                 case '=':
342                         rep->command = EQCOM;
343                         if(rep->ad2.type != A_NONE)
344                                 quit(AD1MES, linebuf);
345                         break;
346
347                 case ':':
348                         if(rep->ad1.type != A_NONE)
349                                 quit(AD0MES, linebuf);
350
351                         while(*cp == L' ')
352                                 cp++;
353                         tp = lab->uninm;
354                         while (*cp && *cp != L';' && *cp != L' ' &&
355                             *cp != L'\t' && *cp != L'#') {
356                                 *tp++ = *cp++;
357                                 if(tp >= &lab->uninm[8])
358                                         quit(LTL, linebuf);
359                         }
360                         *tp = L'\0';
361
362                         if (*lab->uninm == L'\0')               /* no label? */
363                                 quit(CGMES, L":", linebuf);
364                         if(lpt = search(lab)) {
365                                 if(lpt->address)
366                                         quit("Duplicate labels: %S", linebuf);
367                         } else {
368                                 lab->chain = 0;
369                                 lpt = lab;
370                                 if(++lab >= labend)
371                                         quit("Too many labels: %S", linebuf);
372                         }
373                         lpt->address = rep;
374                         if (*cp == L'#')
375                                 continue;
376                         rep--;                  /* reuse this slot */
377                         break;
378
379                 case 'a':
380                         rep->command = ACOM;
381                         if(rep->ad2.type != A_NONE)
382                                 quit(AD1MES, linebuf);
383                         if(*cp == L'\\')
384                                 cp++;
385                         if(*cp++ != L'\n')
386                                 quit(CGMES, L"a", linebuf);
387                         rep->text = p;
388                         p = stext(p, addend);
389                         break;
390                 case 'c':
391                         rep->command = CCOM;
392                         if(*cp == L'\\')
393                                 cp++;
394                         if(*cp++ != L'\n')
395                                 quit(CGMES, L"c", linebuf);
396                         rep->text = p;
397                         p = stext(p, addend);
398                         break;
399                 case 'i':
400                         rep->command = ICOM;
401                         if(rep->ad2.type != A_NONE)
402                                 quit(AD1MES, linebuf);
403                         if(*cp == L'\\')
404                                 cp++;
405                         if(*cp++ != L'\n')
406                                 quit(CGMES, L"i", linebuf);
407                         rep->text = p;
408                         p = stext(p, addend);
409                         break;
410
411                 case 'g':
412                         rep->command = GCOM;
413                         break;
414
415                 case 'G':
416                         rep->command = CGCOM;
417                         break;
418
419                 case 'h':
420                         rep->command = HCOM;
421                         break;
422
423                 case 'H':
424                         rep->command = CHCOM;
425                         break;
426
427                 case 't':
428                         rep->command = TCOM;
429                         goto jtcommon;
430
431                 case 'b':
432                         rep->command = BCOM;
433 jtcommon:
434                         while(*cp == L' ')
435                                 cp++;
436                         if(*cp == L'\0' || *cp == L';') {
437                                 /* no label; jump to end */
438                                 if(pt = ltab[0].chain) {
439                                         while((pt1 = pt->lb1) != nil)
440                                                 pt = pt1;
441                                         pt->lb1 = rep;
442                                 } else
443                                         ltab[0].chain = rep;
444                                 break;
445                         }
446
447                         /* copy label into lab->uninm */
448                         tp = lab->uninm;
449                         while((*tp = *cp++) != L'\0' && *tp != L';')
450                                 if(++tp >= &lab->uninm[8])
451                                         quit(LTL, linebuf);
452                         cp--;
453                         *tp = L'\0';
454
455                         if (*lab->uninm == L'\0')
456                                 /* shouldn't get here */
457                                 quit(CGMES, L"b or t", linebuf);
458                         if((lpt = search(lab)) != nil) {
459                                 if(lpt->address)
460                                         rep->lb1 = lpt->address;
461                                 else {
462                                         for(pt = lpt->chain; pt != nil &&
463                                             (pt1 = pt->lb1) != nil; pt = pt1)
464                                                 ;
465                                         if (pt)
466                                                 pt->lb1 = rep;
467                                 }
468                         } else {                        /* add new label */
469                                 lab->chain = rep;
470                                 lab->address = 0;
471                                 if(++lab >= labend)
472                                         quit("Too many labels: %S", linebuf);
473                         }
474                         break;
475
476                 case 'n':
477                         rep->command = NCOM;
478                         break;
479
480                 case 'N':
481                         rep->command = CNCOM;
482                         break;
483
484                 case 'p':
485                         rep->command = PCOM;
486                         break;
487
488                 case 'P':
489                         rep->command = CPCOM;
490                         break;
491
492                 case 'r':
493                         rep->command = RCOM;
494                         if(rep->ad2.type != A_NONE)
495                                 quit(AD1MES, linebuf);
496                         if(*cp++ != L' ')
497                                 quit(CGMES, L"r", linebuf);
498                         rep->text = p;
499                         p = stext(p, addend);
500                         break;
501
502                 case 'd':
503                         rep->command = DCOM;
504                         break;
505
506                 case 'D':
507                         rep->command = CDCOM;
508                         rep->lb1 = pspace;
509                         break;
510
511                 case 'q':
512                         rep->command = QCOM;
513                         if(rep->ad2.type != A_NONE)
514                                 quit(AD1MES, linebuf);
515                         break;
516
517                 case 'l':
518                         rep->command = LCOM;
519                         break;
520
521                 case 's':
522                         rep->command = SCOM;
523                         seof = *cp++;
524                         if ((rep->re1 = compile()) == 0) {
525                                 if(!lastre)
526                                         quit("First RE may not be null");
527                                 rep->re1 = lastre;
528                         }
529                         rep->rhs = p;
530                         if((p = compsub(p, addend)) == 0)
531                                 quit(CGMES, L"s", linebuf);
532                         if(*cp == L'g') {
533                                 cp++;
534                                 rep->gfl++;
535                         } else if(gflag)
536                                 rep->gfl++;
537
538                         if(*cp == L'p') {
539                                 cp++;
540                                 rep->pfl = 1;
541                         }
542
543                         if(*cp == L'P') {
544                                 cp++;
545                                 rep->pfl = 2;
546                         }
547
548                         if(*cp == L'w') {
549                                 cp++;
550                                 if(*cp++ !=  L' ')
551                                         quit(CGMES, L"s", linebuf);
552                                 text(fname[nfiles]);
553                                 for(i = nfiles - 1; i >= 0; i--)
554                                         if(cmp(fname[nfiles], fname[i]) == 0) {
555                                                 rep->fcode = fcode[i];
556                                                 goto done;
557                                         }
558                                 if(nfiles >= MAXFILES)
559                                         quit("Too many files in w commands 1");
560                                 rep->fcode = open_file(fname[nfiles]);
561                         }
562                         break;
563
564                 case 'w':
565                         rep->command = WCOM;
566                         if(*cp++ != L' ')
567                                 quit(CGMES, L"w", linebuf);
568                         text(fname[nfiles]);
569                         for(i = nfiles - 1; i >= 0; i--)
570                                 if(cmp(fname[nfiles], fname[i]) == 0) {
571                                         rep->fcode = fcode[i];
572                                         goto done;
573                                 }
574                         if(nfiles >= MAXFILES){
575                                 fprint(2, "sed: Too many files in w commands 2 \n");
576                                 fprint(2, "nfiles = %d; MAXF = %d\n",
577                                         nfiles, MAXFILES);
578                                 errexit();
579                         }
580                         rep->fcode = open_file(fname[nfiles]);
581                         break;
582
583                 case 'x':
584                         rep->command = XCOM;
585                         break;
586
587                 case 'y':
588                         rep->command = YCOM;
589                         seof = *cp++;
590                         if (ycomp(rep) == 0)
591                                 quit(CGMES, L"y", linebuf);
592                         break;
593
594                 }
595 done:
596                 if(++rep >= pend)
597                         quit("Too many commands, last: %S", linebuf);
598                 if(*cp++ != L'\0') {
599                         if(cp[-1] == L';')
600                                 goto comploop;
601                         quit(CGMES, cp - 1, linebuf);
602                 }
603         }
604 }
605
606 Biobuf *
607 open_file(char *name)
608 {
609         int fd;
610         Biobuf *bp;
611
612         if ((bp = malloc(sizeof(Biobuf))) == 0)
613                 quit("Out of memory");
614         if ((fd = open(name, OWRITE)) < 0 &&
615             (fd = create(name, OWRITE, 0666)) < 0)
616                 quit("Cannot create %s", name);
617         Binit(bp, fd, OWRITE);
618         Blethal(bp, nil);
619         Bseek(bp, 0, 2);
620         fcode[nfiles++] = bp;
621         return bp;
622 }
623
624 Rune *
625 compsub(Rune *rhs, Rune *end)
626 {
627         Rune r;
628
629         while ((r = *cp++) != '\0') {
630                 if(r == '\\') {
631                         if (rhs < end)
632                                 *rhs++ = Runemax;
633                         else
634                                 return 0;
635                         r = *cp++;
636                         if(r == 'n')
637                                 r = '\n';
638                 } else {
639                         if(r == seof) {
640                                 if (rhs < end)
641                                         *rhs++ = '\0';
642                                 else
643                                         return 0;
644                                 return rhs;
645                         }
646                 }
647                 if (rhs < end)
648                         *rhs++ = r;
649                 else
650                         return 0;
651         }
652         return 0;
653 }
654
655 Reprog *
656 compile(void)
657 {
658         Rune c;
659         char *ep;
660         char expbuf[512];
661
662         if((c = *cp++) == seof)         /* L'//' */
663                 return 0;
664         ep = expbuf;
665         do {
666                 if (c == L'\0' || c == L'\n')
667                         quit(TMMES, linebuf);
668                 if (c == L'\\') {
669                         if (ep >= expbuf+sizeof(expbuf))
670                                 quit(TMMES, linebuf);
671                         ep += runetochar(ep, &c);
672                         if ((c = *cp++) == L'n')
673                                 c = L'\n';
674                 }
675                 if (ep >= expbuf + sizeof(expbuf))
676                         quit(TMMES, linebuf);
677                 ep += runetochar(ep, &c);
678         } while ((c = *cp++) != seof);
679         *ep = 0;
680         return lastre = regcomp(expbuf);
681 }
682
683 void
684 regerror(char *s)
685 {
686         USED(s);
687         quit(CGMES, L"r.e.-using", linebuf);
688 }
689
690 int
691 flushout(Biobufhdr *bp, void *v, long n)
692 {
693         int i;
694         
695         for(i = 0; i < nfiles; i++)
696                 Bflush(fcode[i]);
697         return read(bp->fid, v, n);
698 }
699
700 void
701 newfile(enum PTYPE type, char *name)
702 {
703         if (type == P_ARG)
704                 prog.curr = name;
705         else {
706                 if ((prog.bp = Bopen(name, OREAD)) == 0)
707                         quit("Cannot open pattern-file: %s\n", name);
708                 Blethal(prog.bp, nil);
709                 if(uflag) Biofn(prog.bp, flushout);
710         }
711         prog.type = type;
712 }
713
714 int
715 rline(Rune *buf, Rune *end)
716 {
717         long c;
718         Rune r;
719
720         while ((c = getrune()) >= 0) {
721                 r = c;
722                 if (r == '\\') {
723                         if (buf <= end)
724                                 *buf++ = r;
725                         if ((c = getrune()) < 0)
726                                 break;
727                         r = c;
728                 } else if (r == '\n') {
729                         *buf = '\0';
730                         return 1;
731                 }
732                 if (buf <= end)
733                         *buf++ = r;
734         }
735         *buf = '\0';
736         return -1;
737 }
738
739 long
740 getrune(void)
741 {
742         long c;
743         Rune r;
744         char *p;
745
746         if (prog.type == P_ARG) {
747                 if ((p = prog.curr) != 0) {
748                         if (*p) {
749                                 prog.curr += chartorune(&r, p);
750                                 c = r;
751                         } else {
752                                 c = '\n';       /* fake an end-of-line */
753                                 prog.curr = 0;
754                         }
755                 } else
756                         c = -1;
757         } else if ((c = Bgetrune(prog.bp)) < 0)
758                 Bterm(prog.bp);
759         return c;
760 }
761
762 void
763 address(Addr *ap)
764 {
765         int c;
766         long lno;
767
768         if((c = *cp++) == '$'){
769                 ap->type = A_DOL;
770                 dollars++;
771         }else if(c == '/') {
772                 seof = c;
773                 if (ap->rp = compile())
774                         ap->type = A_RE;
775                 else
776                         ap->type = A_LAST;
777         }
778         else if (c >= '0' && c <= '9') {
779                 lno = c - '0';
780                 while ((c = *cp) >= '0' && c <= '9')
781                         lno = lno*10 + *cp++ - '0';
782                 if(!lno)
783                         quit("line number 0 is illegal",0);
784                 ap->type = A_LINE;
785                 ap->line = lno;
786         }
787         else {
788                 cp--;
789                 ap->type = A_NONE;
790         }
791 }
792
793 cmp(char *a, char *b)           /* compare characters */
794 {
795         while(*a == *b++)
796                 if (*a == '\0')
797                         return 0;
798                 else
799                         a++;
800         return 1;
801 }
802 rcmp(Rune *a, Rune *b)          /* compare runes */
803 {
804         while(*a == *b++)
805                 if (*a == '\0')
806                         return 0;
807                 else
808                         a++;
809         return 1;
810 }
811
812 char *
813 text(char *p)           /* extract character string */
814 {
815         Rune r;
816
817         while(*cp == ' ' || *cp == '\t')
818                 cp++;
819         while (*cp) {
820                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
821                         break;
822                 if (r == '\n')
823                         while (*cp == ' ' || *cp == '\t')
824                                 cp++;
825                 p += runetochar(p, &r);
826         }
827         *p++ = '\0';
828         return p;
829 }
830
831 Rune *
832 stext(Rune *p, Rune *end)               /* extract rune string */
833 {
834         while(*cp == L' ' || *cp == L'\t')
835                 cp++;
836         while (*cp) {
837                 if (*cp == L'\\' && *++cp == L'\0')
838                         break;
839                 if (p >= end-1)
840                         quit(TMMES, linebuf);
841                 if ((*p++ = *cp++) == L'\n')
842                         while(*cp == L' ' || *cp == L'\t')
843                                 cp++;
844         }
845         *p++ = 0;
846         return p;
847 }
848
849
850 Label *
851 search(Label *ptr)
852 {
853         Label   *rp;
854
855         for (rp = ltab; rp < ptr; rp++)
856                 if(rcmp(rp->uninm, ptr->uninm) == 0)
857                         return(rp);
858         return(0);
859 }
860
861 void
862 dechain(void)
863 {
864         Label   *lptr;
865         SedCom  *rptr, *trptr;
866
867         for(lptr = ltab; lptr < lab; lptr++) {
868                 if(lptr->address == 0)
869                         quit("Undefined label: %S", lptr->uninm);
870                 if(lptr->chain) {
871                         rptr = lptr->chain;
872                         while((trptr = rptr->lb1) != nil) {
873                                 rptr->lb1 = lptr->address;
874                                 rptr = trptr;
875                         }
876                         rptr->lb1 = lptr->address;
877                 }
878         }
879 }
880
881 int
882 ycomp(SedCom *r)
883 {
884         int i;
885         Rune *rp, *sp, *tsp;
886         Rune c, highc;
887
888         highc = 0;
889         for(tsp = cp; *tsp != seof; tsp++) {
890                 if(*tsp == L'\\')
891                         tsp++;
892                 if(*tsp == L'\n' || *tsp == L'\0')
893                         return 0;
894                 if (*tsp > highc)
895                         highc = *tsp;
896         }
897         tsp++;
898         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
899                 quit("Out of memory");
900         *rp++ = highc;                          /* save upper bound */
901         for (i = 0; i <= highc; i++)
902                 rp[i] = i;
903         sp = cp;
904         while((c = *sp++) != seof) {
905                 if(c == L'\\' && *sp == L'n') {
906                         sp++;
907                         c = L'\n';
908                 }
909                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
910                         rp[c] = L'\n';
911                         tsp++;
912                 }
913                 if(rp[c] == seof || rp[c] == L'\0') {
914                         free(r->re1);
915                         r->re1 = nil;
916                         return 0;
917                 }
918         }
919         if(*tsp != seof) {
920                 free(r->re1);
921                 r->re1 = nil;
922                 return 0;
923         }
924         cp = tsp+1;
925         return 1;
926 }
927
928 void
929 execute(void)
930 {
931         SedCom  *ipc;
932
933         while (spend = gline(linebuf)){
934                 for(ipc = pspace; ipc->command; ) {
935                         if (!executable(ipc)) {
936                                 ipc++;
937                                 continue;
938                         }
939                         command(ipc);
940
941                         if(delflag)
942                                 break;
943                         if(jflag) {
944                                 jflag = 0;
945                                 if((ipc = ipc->lb1) == 0)
946                                         break;
947                         } else
948                                 ipc++;
949                 }
950                 if(!nflag && !delflag)
951                         putline(&fout, linebuf, spend - linebuf);
952                 if(aptr > abuf)
953                         arout();
954                 delflag = 0;
955         }
956 }
957
958 /* determine if a statement should be applied to an input line */
959 int
960 executable(SedCom *ipc)
961 {
962         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
963                 if (ipc->active == 1)           /* Second line */
964                         ipc->active = 2;
965                 switch(ipc->ad2.type) {
966                 case A_NONE:            /* No second addr; use first */
967                         ipc->active = 0;
968                         break;
969                 case A_DOL:             /* Accept everything */
970                         return !ipc->negfl;
971                 case A_LINE:            /* Line at end of range? */
972                         if (lnum <= ipc->ad2.line) {
973                                 if (ipc->ad2.line == lnum)
974                                         ipc->active = 0;
975                                 return !ipc->negfl;
976                         }
977                         ipc->active = 0;        /* out of range */
978                         return ipc->negfl;
979                 case A_RE:              /* Check for matching R.E. */
980                         if (match(ipc->ad2.rp, linebuf))
981                                 ipc->active = 0;
982                         return !ipc->negfl;
983                 default:
984                         quit("Internal error");
985                 }
986         }
987         switch (ipc->ad1.type) {        /* Check first address */
988         case A_NONE:                    /* Everything matches */
989                 return !ipc->negfl;
990         case A_DOL:                     /* Only last line */
991                 if (dolflag)
992                         return !ipc->negfl;
993                 break;
994         case A_LINE:                    /* Check line number */
995                 if (ipc->ad1.line == lnum) {
996                         ipc->active = 1;        /* In range */
997                         return !ipc->negfl;
998                 }
999                 break;
1000         case A_RE:                      /* Check R.E. */
1001                 if (match(ipc->ad1.rp, linebuf)) {
1002                         ipc->active = 1;        /* In range */
1003                         return !ipc->negfl;
1004                 }
1005                 break;
1006         default:
1007                 quit("Internal error");
1008         }
1009         return ipc->negfl;
1010 }
1011
1012 int
1013 match(Reprog *pattern, Rune *buf)
1014 {
1015         if (!pattern)
1016                 return 0;
1017         subexp[0].rsp = buf;
1018         subexp[0].ep = 0;
1019         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1020                 loc1 = subexp[0].rsp;
1021                 loc2 = subexp[0].rep;
1022                 return 1;
1023         }
1024         loc1 = loc2 = 0;
1025         return 0;
1026 }
1027
1028 int
1029 substitute(SedCom *ipc)
1030 {
1031         int len;
1032
1033         if(!match(ipc->re1, linebuf))
1034                 return 0;
1035
1036         /*
1037          * we have at least one match.  some patterns, e.g. '$' or '^', can
1038          * produce 0-length matches, so during a global substitute we must
1039          * bump to the character after a 0-length match to keep from looping.
1040          */
1041         sflag = 1;
1042         if(ipc->gfl == 0)                       /* single substitution */
1043                 dosub(ipc->rhs);
1044         else
1045                 do{                             /* global substitution */
1046                         len = loc2 - loc1;      /* length of match */
1047                         dosub(ipc->rhs);        /* dosub moves loc2 */
1048                         if(*loc2 == 0)          /* end of string */
1049                                 break;
1050                         if(len == 0)            /* zero-length R.E. match */
1051                                 loc2++;         /* bump over 0-length match */
1052                         if(*loc2 == 0)          /* end of string */
1053                                 break;
1054                 } while(match(ipc->re1, loc2));
1055         return 1;
1056 }
1057
1058 void
1059 dosub(Rune *rhsbuf)
1060 {
1061         int c, n;
1062         Rune *lp, *sp, *rp;
1063
1064         lp = linebuf;
1065         sp = genbuf;
1066         rp = rhsbuf;
1067         while (lp < loc1)
1068                 *sp++ = *lp++;
1069         while(c = *rp++) {
1070                 if (c == '&') {
1071                         sp = place(sp, loc1, loc2);
1072                         continue;
1073                 }
1074                 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1075                         n = c-'0';
1076                         if (subexp[n].rsp && subexp[n].rep) {
1077                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1078                                 continue;
1079                         }
1080                         else {
1081                                 quit("Invalid back reference \\%d", n);
1082                         }
1083                 }
1084                 *sp++ = c;
1085                 if (sp >= &genbuf[LBSIZE])
1086                         quit("Output line too long");
1087         }
1088         lp = loc2;
1089         loc2 = sp - genbuf + linebuf;
1090         while (*sp++ = *lp++)
1091                 if (sp >= &genbuf[LBSIZE])
1092                         quit("Output line too long");
1093         lp = linebuf;
1094         sp = genbuf;
1095         while (*lp++ = *sp++)
1096                 ;
1097         spend = lp - 1;
1098 }
1099
1100 Rune *
1101 place(Rune *sp, Rune *l1, Rune *l2)
1102 {
1103         while (l1 < l2) {
1104                 *sp++ = *l1++;
1105                 if (sp >= &genbuf[LBSIZE])
1106                         quit("Output line too long");
1107         }
1108         return sp;
1109 }
1110
1111 char *
1112 trans(int c)
1113 {
1114         static char buf[] = "\\x0000";
1115         static char hex[] = "0123456789abcdef";
1116
1117         switch(c) {
1118         case '\b':
1119                 return "\\b";
1120         case '\n':
1121                 return "\\n";
1122         case '\r':
1123                 return "\\r";
1124         case '\t':
1125                 return "\\t";
1126         case '\\':
1127                 return "\\\\";
1128         }
1129         buf[2] = hex[(c>>12)&0xF];
1130         buf[3] = hex[(c>>8)&0xF];
1131         buf[4] = hex[(c>>4)&0xF];
1132         buf[5] = hex[c&0xF];
1133         return buf;
1134 }
1135
1136 void
1137 command(SedCom *ipc)
1138 {
1139         int i, c;
1140         char *ucp;
1141         Rune *execp, *p1, *p2, *rp;
1142
1143         switch(ipc->command) {
1144         case ACOM:
1145                 *aptr++ = ipc;
1146                 if(aptr >= abuf+MAXADDS)
1147                         quit("Too many appends after line %ld", lnum);
1148                 *aptr = 0;
1149                 break;
1150         case CCOM:
1151                 delflag = 1;
1152                 if(ipc->active == 1) {
1153                         for(rp = ipc->text; *rp; rp++)
1154                                 Bputrune(&fout, *rp);
1155                         Bputc(&fout, '\n');
1156                 }
1157                 break;
1158         case DCOM:
1159                 delflag++;
1160                 break;
1161         case CDCOM:
1162                 p1 = p2 = linebuf;
1163                 while(*p1 != '\n') {
1164                         if(*p1++ == 0) {
1165                                 delflag++;
1166                                 return;
1167                         }
1168                 }
1169                 p1++;
1170                 while(*p2++ = *p1++)
1171                         ;
1172                 spend = p2 - 1;
1173                 jflag++;
1174                 break;
1175         case EQCOM:
1176                 Bprint(&fout, "%ld\n", lnum);
1177                 break;
1178         case GCOM:
1179                 p1 = linebuf;
1180                 p2 = holdsp;
1181                 while(*p1++ = *p2++)
1182                         ;
1183                 spend = p1 - 1;
1184                 break;
1185         case CGCOM:
1186                 *spend++ = '\n';
1187                 p1 = spend;
1188                 p2 = holdsp;
1189                 while(*p1++ = *p2++)
1190                         if(p1 >= lbend)
1191                                 break;
1192                 spend = p1 - 1;
1193                 break;
1194         case HCOM:
1195                 p1 = holdsp;
1196                 p2 = linebuf;
1197                 while(*p1++ = *p2++);
1198                 hspend = p1 - 1;
1199                 break;
1200         case CHCOM:
1201                 *hspend++ = '\n';
1202                 p1 = hspend;
1203                 p2 = linebuf;
1204                 while(*p1++ = *p2++)
1205                         if(p1 >= hend)
1206                                 break;
1207                 hspend = p1 - 1;
1208                 break;
1209         case ICOM:
1210                 for(rp = ipc->text; *rp; rp++)
1211                         Bputrune(&fout, *rp);
1212                 Bputc(&fout, '\n');
1213                 break;
1214         case BCOM:
1215                 jflag = 1;
1216                 break;
1217         case LCOM:
1218                 c = 0;
1219                 for (i = 0, rp = linebuf; *rp; rp++) {
1220                         c = *rp;
1221                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1222                                 Bputc(&fout, c);
1223                                 if(i++ > 71) {
1224                                         Bprint(&fout, "\\\n");
1225                                         i = 0;
1226                                 }
1227                         } else {
1228                                 for (ucp = trans(*rp); *ucp; ucp++){
1229                                         c = *ucp;
1230                                         Bputc(&fout, c);
1231                                         if(i++ > 71) {
1232                                                 Bprint(&fout, "\\\n");
1233                                                 i = 0;
1234                                         }
1235                                 }
1236                         }
1237                 }
1238                 if(c == ' ')
1239                         Bprint(&fout, "\\n");
1240                 Bputc(&fout, '\n');
1241                 break;
1242         case NCOM:
1243                 if(!nflag)
1244                         putline(&fout, linebuf, spend-linebuf);
1245
1246                 if(aptr > abuf)
1247                         arout();
1248                 if((execp = gline(linebuf)) == 0) {
1249                         delflag = 1;
1250                         break;
1251                 }
1252                 spend = execp;
1253                 break;
1254         case CNCOM:
1255                 if(aptr > abuf)
1256                         arout();
1257                 *spend++ = '\n';
1258                 if((execp = gline(spend)) == 0) {
1259                         delflag = 1;
1260                         break;
1261                 }
1262                 spend = execp;
1263                 break;
1264         case PCOM:
1265                 putline(&fout, linebuf, spend-linebuf);
1266                 break;
1267         case CPCOM:
1268 cpcom:
1269                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1270                         Bputc(&fout, *rp);
1271                 Bputc(&fout, '\n');
1272                 break;
1273         case QCOM:
1274                 if(!nflag)
1275                         putline(&fout, linebuf, spend-linebuf);
1276                 if(aptr > abuf)
1277                         arout();
1278                 exits(nil);
1279         case RCOM:
1280                 *aptr++ = ipc;
1281                 if(aptr >= &abuf[MAXADDS])
1282                         quit("Too many reads after line %ld", lnum);
1283                 *aptr = 0;
1284                 break;
1285         case SCOM:
1286                 i = substitute(ipc);
1287                 if(i && ipc->pfl)
1288                         if(ipc->pfl == 1)
1289                                 putline(&fout, linebuf, spend-linebuf);
1290                         else
1291                                 goto cpcom;
1292                 if(i && ipc->fcode)
1293                         goto wcom;
1294                 break;
1295
1296         case TCOM:
1297                 if(sflag) {
1298                         sflag = 0;
1299                         jflag = 1;
1300                 }
1301                 break;
1302
1303         case WCOM:
1304 wcom:
1305                 putline(ipc->fcode,linebuf, spend - linebuf);
1306                 break;
1307         case XCOM:
1308                 p1 = linebuf;
1309                 p2 = genbuf;
1310                 while(*p2++ = *p1++)
1311                         ;
1312                 p1 = holdsp;
1313                 p2 = linebuf;
1314                 while(*p2++ = *p1++)
1315                         ;
1316                 spend = p2 - 1;
1317                 p1 = genbuf;
1318                 p2 = holdsp;
1319                 while(*p2++ = *p1++)
1320                         ;
1321                 hspend = p2 - 1;
1322                 break;
1323         case YCOM:
1324                 p1 = linebuf;
1325                 p2 = ipc->text;
1326                 for (i = *p2++; *p1; p1++)
1327                         if (*p1 <= i)
1328                                 *p1 = p2[*p1];
1329                 break;
1330         }
1331 }
1332
1333 void
1334 putline(Biobuf *bp, Rune *buf, int n)
1335 {
1336         while (n--)
1337                 Bputrune(bp, *buf++);
1338         Bputc(bp, '\n');
1339 }
1340
1341 void
1342 arout(void)
1343 {
1344         int     c;
1345         char    *s, *e;
1346         char    buf[128];
1347         Rune    *p1;
1348         Biobuf  *fi;
1349
1350         for (aptr = abuf; *aptr; aptr++) {
1351                 if((*aptr)->command == ACOM) {
1352                         for(p1 = (*aptr)->text; *p1; p1++ )
1353                                 Bputrune(&fout, *p1);
1354                         Bputc(&fout, '\n');
1355                 } else {
1356                         for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++)
1357                                 s += runetochar(s, p1);
1358                         *s = '\0';
1359                         if((fi = Bopen(buf, OREAD)) == 0)
1360                                 continue;
1361                         Blethal(fi, nil);
1362                         if(uflag) Biofn(fi, flushout);
1363                         while((c = Bgetc(fi)) >= 0)
1364                                 Bputc(&fout, c);
1365                         Bterm(fi);
1366                 }
1367         }
1368         aptr = abuf;
1369         *aptr = 0;
1370 }
1371
1372 void
1373 errexit(void)
1374 {
1375         exits("error");
1376 }
1377
1378 void
1379 quit(char *fmt, ...)
1380 {
1381         char *p, *ep;
1382         char msg[256];
1383         va_list arg;
1384
1385         ep = msg + sizeof msg;
1386         p = seprint(msg, ep, "sed: ");
1387         va_start(arg, fmt);
1388         p = vseprint(p, ep, fmt, arg);
1389         va_end(arg);
1390         p = seprint(p, ep, "\n");
1391         write(2, msg, p - msg);
1392         errexit();
1393 }
1394
1395 Rune *
1396 gline(Rune *addr)
1397 {
1398         long c;
1399         Rune *p;
1400         static long peekc = 0;
1401
1402         if (f == 0 && opendata() < 0)
1403                 return 0;
1404         sflag = 0;
1405         lnum++;
1406 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1407         do {
1408                 p = addr;
1409                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1410                         if (c == '\n') {
1411                                 if (dollars != 0 && (peekc = Bgetrune(f)) < 0 && fhead == nil)
1412                                         dolflag = 1;
1413                                 *p = '\0';
1414                                 return p;
1415                         }
1416                         if (c && p < lbend)
1417                                 *p++ = c;
1418                 }
1419                 /* return partial final line, adding implicit newline */
1420                 if(p != addr) {
1421                         *p = '\0';
1422                         peekc = -1;
1423                         if (fhead == nil)
1424                                 dolflag = 1;
1425                         return p;
1426                 }
1427                 peekc = 0;
1428                 Bterm(f);
1429         } while (opendata() > 0);               /* Switch to next stream */
1430         f = 0;
1431         return 0;
1432 }
1433
1434 /*
1435  * Data file input section - the intent is to transparently
1436  *      catenate all data input streams.
1437  */
1438 void
1439 enroll(char *filename)          /* Add a file to the input file cache */
1440 {
1441         FileCache *fp;
1442
1443         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1444                 quit("Out of memory");
1445         if (ftail == nil)
1446                 fhead = fp;
1447         else
1448                 ftail->next = fp;
1449         ftail = fp;
1450         fp->next = nil;
1451         fp->name = filename;            /* 0 => stdin */
1452 }
1453
1454 int
1455 opendata(void)
1456 {
1457         if (fhead == nil)
1458                 return -1;
1459         if (fhead->name) {
1460                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1461                         quit("Can't open %s", fhead->name);
1462         } else {
1463                 Binit(&stdin, 0, OREAD);
1464                 f = &stdin;
1465         }
1466         Blethal(f, nil);
1467         if(uflag) Biofn(f, flushout);
1468         fhead = fhead->next;
1469         return 1;
1470 }