]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
cwfs: fix listen filedescriptor leaks
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 int     dollars;                        /* Number of dollar (first) addresses */
100
101 Reprog  *lastre;                        /* Last regular expression */
102 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
103
104 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
105 Rune    *addend = addspace+ADDSIZE;
106
107 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
108 SedCom  **aptr = abuf;
109
110 struct {                                /* Sed program input control block */
111         enum PTYPE {                    /* Either on command line or in file */
112                 P_ARG,
113                 P_FILE,
114         } type;
115         union PCTL {                    /* Pointer to data */
116                 Biobuf  *bp;
117                 char    *curr;
118         };
119 } prog;
120
121 Rune    genbuf[LBSIZE+1];               /* Miscellaneous buffer */
122
123 FileCache       *fhead;                 /* Head of File Cache Chain */
124 FileCache       *ftail;                 /* Tail of File Cache Chain */
125
126 Rune    *loc1;                          /* Start of pattern match */
127 Rune    *loc2;                          /* End of pattern match */
128 Rune    seof;                           /* Pattern delimiter char */
129
130 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
131 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
132 Rune    *spend = linebuf;               /* End of input data */
133 Rune    *cp;                            /* Current scan point in linebuf */
134
135 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
136 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
137 Rune    *hspend = holdsp;               /* End of hold data */
138
139 int     nflag;                          /* Command line flags */
140 int     gflag;
141 int     uflag;
142
143 int     dolflag;                        /* Set when at true EOF */
144 int     sflag;                          /* Set when substitution done */
145 int     jflag;                          /* Set when jump required */
146 int     delflag;                        /* Delete current line when set */
147
148 long    lnum;                           /* Input line count */
149
150 char    fname[MAXFILES][40];            /* File name cache */
151 Biobuf  *fcode[MAXFILES];               /* File ID cache */
152 int     nfiles;                         /* Cache fill point */
153
154 Biobuf  fout;                           /* Output stream */
155 Biobuf  stdin;                          /* Default input */
156 Biobuf* f;                              /* Input data */
157
158 Label   ltab[LABSIZE];                  /* Label name symbol table */
159 Label   *labend = ltab+LABSIZE;         /* End of label table */
160 Label   *lab = ltab+1;                  /* Current Fill point */
161
162 int     depth;                          /* {} stack pointer */
163
164 Rune    bad;                            /* Dummy err ptr reference */
165 Rune    *badp = &bad;
166
167
168 char    CGMES[]  =      "%S command garbled: %S";
169 char    TMMES[]  =      "Too much text: %S";
170 char    LTL[]    =      "Label too long: %S";
171 char    AD0MES[] =      "No addresses allowed: %S";
172 char    AD1MES[] =      "Only one address allowed: %S";
173
174 void    address(Addr *);
175 void    arout(void);
176 int     cmp(char *, char *);
177 int     rcmp(Rune *, Rune *);
178 void    command(SedCom *);
179 Reprog  *compile(void);
180 Rune    *compsub(Rune *, Rune *);
181 void    dechain(void);
182 void    dosub(Rune *);
183 void    enroll(char *);
184 void    errexit(void);
185 int     executable(SedCom *);
186 void    execute(void);
187 void    fcomp(void);
188 long    getrune(void);
189 Rune    *gline(Rune *);
190 int     match(Reprog *, Rune *);
191 void    newfile(enum PTYPE, char *);
192 int     opendata(void);
193 Biobuf  *open_file(char *);
194 Rune    *place(Rune *, Rune *, Rune *);
195 void    quit(char *, ...);
196 int     rline(Rune *, Rune *);
197 Label   *search(Label *);
198 int     substitute(SedCom *);
199 char    *text(char *);
200 Rune    *stext(Rune *, Rune *);
201 int     ycomp(SedCom *);
202 char *  trans(int c);
203 void    putline(Biobuf *bp, Rune *buf, int n);
204
205 void
206 main(int argc, char **argv)
207 {
208         int compfl;
209
210         lnum = 0;
211         Binit(&fout, 1, OWRITE);
212         Blethal(&fout, nil);
213         fcode[nfiles++] = &fout;
214         compfl = 0;
215
216         if(argc == 1)
217                 exits(nil);
218         ARGBEGIN{
219         case 'e':
220                 if (argc <= 1)
221                         quit("missing pattern");
222                 newfile(P_ARG, ARGF());
223                 fcomp();
224                 compfl = 1;
225                 continue;
226         case 'f':
227                 if(argc <= 1)
228                         quit("no pattern-file");
229                 newfile(P_FILE, ARGF());
230                 fcomp();
231                 compfl = 1;
232                 continue;
233         case 'g':
234                 gflag++;
235                 continue;
236         case 'n':
237                 nflag++;
238                 continue;
239         case 'u':
240                 uflag++;
241                 continue;
242         default:
243                 quit("Unknown flag: %c", ARGC());
244         } ARGEND
245
246         if(compfl == 0) {
247                 if (--argc < 0)
248                         quit("missing pattern");
249                 newfile(P_ARG, *argv++);
250                 fcomp();
251         }
252
253         if(depth)
254                 quit("Too many {'s");
255
256         ltab[0].address = rep;
257
258         dechain();
259
260         if(argc <= 0)
261                 enroll(nil);            /* Add stdin to cache */
262         else
263                 while(--argc >= 0)
264                         enroll(*argv++);
265         execute();
266         exits(nil);
267 }
268
269 void
270 fcomp(void)
271 {
272         int     i;
273         Label   *lpt;
274         Rune    *tp;
275         SedCom  *pt, *pt1;
276         static Rune     *p = addspace;
277         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
278
279         while (rline(linebuf, lbend) >= 0) {
280                 cp = linebuf;
281 comploop:
282                 while(*cp == L' ' || *cp == L'\t')
283                         cp++;
284                 if(*cp == L'\0' || *cp == L'#')
285                         continue;
286                 if(*cp == L';') {
287                         cp++;
288                         goto comploop;
289                 }
290
291                 address(&rep->ad1);
292                 if (rep->ad1.type != A_NONE) {
293                         if (rep->ad1.type == A_DOL)
294                                 dollars++;
295                         if (rep->ad1.type == A_LAST) {
296                                 if (!lastre)
297                                         quit("First RE may not be null");
298                                 rep->ad1.type = A_RE;
299                                 rep->ad1.rp = lastre;
300                         }
301                         if(*cp == L',' || *cp == L';') {
302                                 cp++;
303                                 address(&rep->ad2);
304                                 if (rep->ad2.type == A_LAST) {
305                                         rep->ad2.type = A_RE;
306                                         rep->ad2.rp = lastre;
307                                 }
308                         } else
309                                 rep->ad2.type = A_NONE;
310                 }
311                 while(*cp == L' ' || *cp == L'\t')
312                         cp++;
313
314 swit:
315                 switch(*cp++) {
316                 default:
317                         quit("Unrecognized command: %S", linebuf);
318
319                 case '!':
320                         rep->negfl = 1;
321                         goto swit;
322
323                 case '{':
324                         rep->command = BCOM;
325                         rep->negfl = !rep->negfl;
326                         cmpend[depth++] = &rep->lb1;
327                         if(++rep >= pend)
328                                 quit("Too many commands: %S", linebuf);
329                         if(*cp == '\0')
330                                 continue;
331                         goto comploop;
332
333                 case '}':
334                         if(rep->ad1.type != A_NONE)
335                                 quit(AD0MES, linebuf);
336                         if(--depth < 0)
337                                 quit("Too many }'s");
338                         *cmpend[depth] = rep;
339                         if(*cp == 0)
340                                 continue;
341                         goto comploop;
342
343                 case '=':
344                         rep->command = EQCOM;
345                         if(rep->ad2.type != A_NONE)
346                                 quit(AD1MES, linebuf);
347                         break;
348
349                 case ':':
350                         if(rep->ad1.type != A_NONE)
351                                 quit(AD0MES, linebuf);
352
353                         while(*cp == L' ')
354                                 cp++;
355                         tp = lab->uninm;
356                         while (*cp && *cp != L';' && *cp != L' ' &&
357                             *cp != L'\t' && *cp != L'#') {
358                                 *tp++ = *cp++;
359                                 if(tp >= &lab->uninm[8])
360                                         quit(LTL, linebuf);
361                         }
362                         *tp = L'\0';
363
364                         if (*lab->uninm == L'\0')               /* no label? */
365                                 quit(CGMES, L":", linebuf);
366                         if(lpt = search(lab)) {
367                                 if(lpt->address)
368                                         quit("Duplicate labels: %S", linebuf);
369                         } else {
370                                 lab->chain = 0;
371                                 lpt = lab;
372                                 if(++lab >= labend)
373                                         quit("Too many labels: %S", linebuf);
374                         }
375                         lpt->address = rep;
376                         if (*cp == L'#')
377                                 continue;
378                         rep--;                  /* reuse this slot */
379                         break;
380
381                 case 'a':
382                         rep->command = ACOM;
383                         if(rep->ad2.type != A_NONE)
384                                 quit(AD1MES, linebuf);
385                         if(*cp == L'\\')
386                                 cp++;
387                         if(*cp++ != L'\n')
388                                 quit(CGMES, L"a", linebuf);
389                         rep->text = p;
390                         p = stext(p, addend);
391                         break;
392                 case 'c':
393                         rep->command = CCOM;
394                         if(*cp == L'\\')
395                                 cp++;
396                         if(*cp++ != L'\n')
397                                 quit(CGMES, L"c", linebuf);
398                         rep->text = p;
399                         p = stext(p, addend);
400                         break;
401                 case 'i':
402                         rep->command = ICOM;
403                         if(rep->ad2.type != A_NONE)
404                                 quit(AD1MES, linebuf);
405                         if(*cp == L'\\')
406                                 cp++;
407                         if(*cp++ != L'\n')
408                                 quit(CGMES, L"i", linebuf);
409                         rep->text = p;
410                         p = stext(p, addend);
411                         break;
412
413                 case 'g':
414                         rep->command = GCOM;
415                         break;
416
417                 case 'G':
418                         rep->command = CGCOM;
419                         break;
420
421                 case 'h':
422                         rep->command = HCOM;
423                         break;
424
425                 case 'H':
426                         rep->command = CHCOM;
427                         break;
428
429                 case 't':
430                         rep->command = TCOM;
431                         goto jtcommon;
432
433                 case 'b':
434                         rep->command = BCOM;
435 jtcommon:
436                         while(*cp == L' ')
437                                 cp++;
438                         if(*cp == L'\0' || *cp == L';') {
439                                 /* no label; jump to end */
440                                 if(pt = ltab[0].chain) {
441                                         while((pt1 = pt->lb1) != nil)
442                                                 pt = pt1;
443                                         pt->lb1 = rep;
444                                 } else
445                                         ltab[0].chain = rep;
446                                 break;
447                         }
448
449                         /* copy label into lab->uninm */
450                         tp = lab->uninm;
451                         while((*tp = *cp++) != L'\0' && *tp != L';')
452                                 if(++tp >= &lab->uninm[8])
453                                         quit(LTL, linebuf);
454                         cp--;
455                         *tp = L'\0';
456
457                         if (*lab->uninm == L'\0')
458                                 /* shouldn't get here */
459                                 quit(CGMES, L"b or t", linebuf);
460                         if((lpt = search(lab)) != nil) {
461                                 if(lpt->address)
462                                         rep->lb1 = lpt->address;
463                                 else {
464                                         for(pt = lpt->chain; pt != nil &&
465                                             (pt1 = pt->lb1) != nil; pt = pt1)
466                                                 ;
467                                         if (pt)
468                                                 pt->lb1 = rep;
469                                 }
470                         } else {                        /* add new label */
471                                 lab->chain = rep;
472                                 lab->address = 0;
473                                 if(++lab >= labend)
474                                         quit("Too many labels: %S", linebuf);
475                         }
476                         break;
477
478                 case 'n':
479                         rep->command = NCOM;
480                         break;
481
482                 case 'N':
483                         rep->command = CNCOM;
484                         break;
485
486                 case 'p':
487                         rep->command = PCOM;
488                         break;
489
490                 case 'P':
491                         rep->command = CPCOM;
492                         break;
493
494                 case 'r':
495                         rep->command = RCOM;
496                         if(rep->ad2.type != A_NONE)
497                                 quit(AD1MES, linebuf);
498                         if(*cp++ != L' ')
499                                 quit(CGMES, L"r", linebuf);
500                         rep->text = p;
501                         p = stext(p, addend);
502                         break;
503
504                 case 'd':
505                         rep->command = DCOM;
506                         break;
507
508                 case 'D':
509                         rep->command = CDCOM;
510                         rep->lb1 = pspace;
511                         break;
512
513                 case 'q':
514                         rep->command = QCOM;
515                         if(rep->ad2.type != A_NONE)
516                                 quit(AD1MES, linebuf);
517                         break;
518
519                 case 'l':
520                         rep->command = LCOM;
521                         break;
522
523                 case 's':
524                         rep->command = SCOM;
525                         seof = *cp++;
526                         if ((rep->re1 = compile()) == 0) {
527                                 if(!lastre)
528                                         quit("First RE may not be null");
529                                 rep->re1 = lastre;
530                         }
531                         rep->rhs = p;
532                         if((p = compsub(p, addend)) == 0)
533                                 quit(CGMES, L"s", linebuf);
534                         if(*cp == L'g') {
535                                 cp++;
536                                 rep->gfl++;
537                         } else if(gflag)
538                                 rep->gfl++;
539
540                         if(*cp == L'p') {
541                                 cp++;
542                                 rep->pfl = 1;
543                         }
544
545                         if(*cp == L'P') {
546                                 cp++;
547                                 rep->pfl = 2;
548                         }
549
550                         if(*cp == L'w') {
551                                 cp++;
552                                 if(*cp++ !=  L' ')
553                                         quit(CGMES, L"s", linebuf);
554                                 text(fname[nfiles]);
555                                 for(i = nfiles - 1; i >= 0; i--)
556                                         if(cmp(fname[nfiles], fname[i]) == 0) {
557                                                 rep->fcode = fcode[i];
558                                                 goto done;
559                                         }
560                                 if(nfiles >= MAXFILES)
561                                         quit("Too many files in w commands 1");
562                                 rep->fcode = open_file(fname[nfiles]);
563                         }
564                         break;
565
566                 case 'w':
567                         rep->command = WCOM;
568                         if(*cp++ != L' ')
569                                 quit(CGMES, L"w", linebuf);
570                         text(fname[nfiles]);
571                         for(i = nfiles - 1; i >= 0; i--)
572                                 if(cmp(fname[nfiles], fname[i]) == 0) {
573                                         rep->fcode = fcode[i];
574                                         goto done;
575                                 }
576                         if(nfiles >= MAXFILES){
577                                 fprint(2, "sed: Too many files in w commands 2 \n");
578                                 fprint(2, "nfiles = %d; MAXF = %d\n",
579                                         nfiles, MAXFILES);
580                                 errexit();
581                         }
582                         rep->fcode = open_file(fname[nfiles]);
583                         break;
584
585                 case 'x':
586                         rep->command = XCOM;
587                         break;
588
589                 case 'y':
590                         rep->command = YCOM;
591                         seof = *cp++;
592                         if (ycomp(rep) == 0)
593                                 quit(CGMES, L"y", linebuf);
594                         break;
595
596                 }
597 done:
598                 if(++rep >= pend)
599                         quit("Too many commands, last: %S", linebuf);
600                 if(*cp++ != L'\0') {
601                         if(cp[-1] == L';')
602                                 goto comploop;
603                         quit(CGMES, cp - 1, linebuf);
604                 }
605         }
606 }
607
608 Biobuf *
609 open_file(char *name)
610 {
611         int fd;
612         Biobuf *bp;
613
614         if ((bp = malloc(sizeof(Biobuf))) == 0)
615                 quit("Out of memory");
616         if ((fd = open(name, OWRITE)) < 0 &&
617             (fd = create(name, OWRITE, 0666)) < 0)
618                 quit("Cannot create %s", name);
619         Binit(bp, fd, OWRITE);
620         Blethal(bp, nil);
621         Bseek(bp, 0, 2);
622         fcode[nfiles++] = bp;
623         return bp;
624 }
625
626 Rune *
627 compsub(Rune *rhs, Rune *end)
628 {
629         Rune r;
630
631         while ((r = *cp++) != '\0') {
632                 if(r == '\\') {
633                         if (rhs < end)
634                                 *rhs++ = Runemax;
635                         else
636                                 return 0;
637                         r = *cp++;
638                         if(r == 'n')
639                                 r = '\n';
640                 } else {
641                         if(r == seof) {
642                                 if (rhs < end)
643                                         *rhs++ = '\0';
644                                 else
645                                         return 0;
646                                 return rhs;
647                         }
648                 }
649                 if (rhs < end)
650                         *rhs++ = r;
651                 else
652                         return 0;
653         }
654         return 0;
655 }
656
657 Reprog *
658 compile(void)
659 {
660         Rune c;
661         char *ep;
662         char expbuf[512];
663
664         if((c = *cp++) == seof)         /* L'//' */
665                 return 0;
666         ep = expbuf;
667         do {
668                 if (c == L'\0' || c == L'\n')
669                         quit(TMMES, linebuf);
670                 if (c == L'\\') {
671                         if (ep >= expbuf+sizeof(expbuf))
672                                 quit(TMMES, linebuf);
673                         ep += runetochar(ep, &c);
674                         if ((c = *cp++) == L'n')
675                                 c = L'\n';
676                 }
677                 if (ep >= expbuf + sizeof(expbuf))
678                         quit(TMMES, linebuf);
679                 ep += runetochar(ep, &c);
680         } while ((c = *cp++) != seof);
681         *ep = 0;
682         return lastre = regcomp(expbuf);
683 }
684
685 void
686 regerror(char *s)
687 {
688         USED(s);
689         quit(CGMES, L"r.e.-using", linebuf);
690 }
691
692 int
693 flushout(Biobufhdr *bp, void *v, long n)
694 {
695         int i;
696         
697         for(i = 0; i < nfiles; i++)
698                 Bflush(fcode[i]);
699         return read(bp->fid, v, n);
700 }
701
702 void
703 newfile(enum PTYPE type, char *name)
704 {
705         if (type == P_ARG)
706                 prog.curr = name;
707         else {
708                 if ((prog.bp = Bopen(name, OREAD)) == 0)
709                         quit("Cannot open pattern-file: %s\n", name);
710                 Blethal(prog.bp, nil);
711                 if(uflag) Biofn(prog.bp, flushout);
712         }
713         prog.type = type;
714 }
715
716 int
717 rline(Rune *buf, Rune *end)
718 {
719         long c;
720         Rune r;
721
722         while ((c = getrune()) >= 0) {
723                 r = c;
724                 if (r == '\\') {
725                         if (buf <= end)
726                                 *buf++ = r;
727                         if ((c = getrune()) < 0)
728                                 break;
729                         r = c;
730                 } else if (r == '\n') {
731                         *buf = '\0';
732                         return 1;
733                 }
734                 if (buf <= end)
735                         *buf++ = r;
736         }
737         *buf = '\0';
738         return -1;
739 }
740
741 long
742 getrune(void)
743 {
744         long c;
745         Rune r;
746         char *p;
747
748         if (prog.type == P_ARG) {
749                 if ((p = prog.curr) != 0) {
750                         if (*p) {
751                                 prog.curr += chartorune(&r, p);
752                                 c = r;
753                         } else {
754                                 c = '\n';       /* fake an end-of-line */
755                                 prog.curr = 0;
756                         }
757                 } else
758                         c = -1;
759         } else if ((c = Bgetrune(prog.bp)) < 0)
760                 Bterm(prog.bp);
761         return c;
762 }
763
764 void
765 address(Addr *ap)
766 {
767         int c;
768         long lno;
769
770         if((c = *cp++) == '$')
771                 ap->type = A_DOL;
772         else if(c == '/') {
773                 seof = c;
774                 if (ap->rp = compile())
775                         ap->type = A_RE;
776                 else
777                         ap->type = A_LAST;
778         }
779         else if (c >= '0' && c <= '9') {
780                 lno = c - '0';
781                 while ((c = *cp) >= '0' && c <= '9')
782                         lno = lno*10 + *cp++ - '0';
783                 if(!lno)
784                         quit("line number 0 is illegal",0);
785                 ap->type = A_LINE;
786                 ap->line = lno;
787         }
788         else {
789                 cp--;
790                 ap->type = A_NONE;
791         }
792 }
793
794 cmp(char *a, char *b)           /* compare characters */
795 {
796         while(*a == *b++)
797                 if (*a == '\0')
798                         return 0;
799                 else
800                         a++;
801         return 1;
802 }
803 rcmp(Rune *a, Rune *b)          /* compare runes */
804 {
805         while(*a == *b++)
806                 if (*a == '\0')
807                         return 0;
808                 else
809                         a++;
810         return 1;
811 }
812
813 char *
814 text(char *p)           /* extract character string */
815 {
816         Rune r;
817
818         while(*cp == ' ' || *cp == '\t')
819                 cp++;
820         while (*cp) {
821                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
822                         break;
823                 if (r == '\n')
824                         while (*cp == ' ' || *cp == '\t')
825                                 cp++;
826                 p += runetochar(p, &r);
827         }
828         *p++ = '\0';
829         return p;
830 }
831
832 Rune *
833 stext(Rune *p, Rune *end)               /* extract rune string */
834 {
835         while(*cp == L' ' || *cp == L'\t')
836                 cp++;
837         while (*cp) {
838                 if (*cp == L'\\' && *++cp == L'\0')
839                         break;
840                 if (p >= end-1)
841                         quit(TMMES, linebuf);
842                 if ((*p++ = *cp++) == L'\n')
843                         while(*cp == L' ' || *cp == L'\t')
844                                 cp++;
845         }
846         *p++ = 0;
847         return p;
848 }
849
850
851 Label *
852 search(Label *ptr)
853 {
854         Label   *rp;
855
856         for (rp = ltab; rp < ptr; rp++)
857                 if(rcmp(rp->uninm, ptr->uninm) == 0)
858                         return(rp);
859         return(0);
860 }
861
862 void
863 dechain(void)
864 {
865         Label   *lptr;
866         SedCom  *rptr, *trptr;
867
868         for(lptr = ltab; lptr < lab; lptr++) {
869                 if(lptr->address == 0)
870                         quit("Undefined label: %S", lptr->uninm);
871                 if(lptr->chain) {
872                         rptr = lptr->chain;
873                         while((trptr = rptr->lb1) != nil) {
874                                 rptr->lb1 = lptr->address;
875                                 rptr = trptr;
876                         }
877                         rptr->lb1 = lptr->address;
878                 }
879         }
880 }
881
882 int
883 ycomp(SedCom *r)
884 {
885         int i;
886         Rune *rp, *sp, *tsp;
887         Rune c, highc;
888
889         highc = 0;
890         for(tsp = cp; *tsp != seof; tsp++) {
891                 if(*tsp == L'\\')
892                         tsp++;
893                 if(*tsp == L'\n' || *tsp == L'\0')
894                         return 0;
895                 if (*tsp > highc)
896                         highc = *tsp;
897         }
898         tsp++;
899         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
900                 quit("Out of memory");
901         *rp++ = highc;                          /* save upper bound */
902         for (i = 0; i <= highc; i++)
903                 rp[i] = i;
904         sp = cp;
905         while((c = *sp++) != seof) {
906                 if(c == L'\\' && *sp == L'n') {
907                         sp++;
908                         c = L'\n';
909                 }
910                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
911                         rp[c] = L'\n';
912                         tsp++;
913                 }
914                 if(rp[c] == seof || rp[c] == L'\0') {
915                         free(r->re1);
916                         r->re1 = nil;
917                         return 0;
918                 }
919         }
920         if(*tsp != seof) {
921                 free(r->re1);
922                 r->re1 = nil;
923                 return 0;
924         }
925         cp = tsp+1;
926         return 1;
927 }
928
929 void
930 execute(void)
931 {
932         SedCom  *ipc;
933
934         while (spend = gline(linebuf)){
935                 for(ipc = pspace; ipc->command; ) {
936                         if (!executable(ipc)) {
937                                 ipc++;
938                                 continue;
939                         }
940                         command(ipc);
941
942                         if(delflag)
943                                 break;
944                         if(jflag) {
945                                 jflag = 0;
946                                 if((ipc = ipc->lb1) == 0)
947                                         break;
948                         } else
949                                 ipc++;
950                 }
951                 if(!nflag && !delflag)
952                         putline(&fout, linebuf, spend - linebuf);
953                 if(aptr > abuf)
954                         arout();
955                 delflag = 0;
956         }
957 }
958
959 /* determine if a statement should be applied to an input line */
960 int
961 executable(SedCom *ipc)
962 {
963         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
964                 if (ipc->active == 1)           /* Second line */
965                         ipc->active = 2;
966                 switch(ipc->ad2.type) {
967                 case A_NONE:            /* No second addr; use first */
968                         ipc->active = 0;
969                         break;
970                 case A_DOL:             /* Accept everything */
971                         return !ipc->negfl;
972                 case A_LINE:            /* Line at end of range? */
973                         if (lnum <= ipc->ad2.line) {
974                                 if (ipc->ad2.line == lnum)
975                                         ipc->active = 0;
976                                 return !ipc->negfl;
977                         }
978                         ipc->active = 0;        /* out of range */
979                         return ipc->negfl;
980                 case A_RE:              /* Check for matching R.E. */
981                         if (match(ipc->ad2.rp, linebuf))
982                                 ipc->active = 0;
983                         return !ipc->negfl;
984                 default:
985                         quit("Internal error");
986                 }
987         }
988         switch (ipc->ad1.type) {        /* Check first address */
989         case A_NONE:                    /* Everything matches */
990                 return !ipc->negfl;
991         case A_DOL:                     /* Only last line */
992                 if (dolflag)
993                         return !ipc->negfl;
994                 break;
995         case A_LINE:                    /* Check line number */
996                 if (ipc->ad1.line == lnum) {
997                         ipc->active = 1;        /* In range */
998                         return !ipc->negfl;
999                 }
1000                 break;
1001         case A_RE:                      /* Check R.E. */
1002                 if (match(ipc->ad1.rp, linebuf)) {
1003                         ipc->active = 1;        /* In range */
1004                         return !ipc->negfl;
1005                 }
1006                 break;
1007         default:
1008                 quit("Internal error");
1009         }
1010         return ipc->negfl;
1011 }
1012
1013 int
1014 match(Reprog *pattern, Rune *buf)
1015 {
1016         if (!pattern)
1017                 return 0;
1018         subexp[0].rsp = buf;
1019         subexp[0].ep = 0;
1020         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1021                 loc1 = subexp[0].rsp;
1022                 loc2 = subexp[0].rep;
1023                 return 1;
1024         }
1025         loc1 = loc2 = 0;
1026         return 0;
1027 }
1028
1029 int
1030 substitute(SedCom *ipc)
1031 {
1032         int len;
1033
1034         if(!match(ipc->re1, linebuf))
1035                 return 0;
1036
1037         /*
1038          * we have at least one match.  some patterns, e.g. '$' or '^', can
1039          * produce 0-length matches, so during a global substitute we must
1040          * bump to the character after a 0-length match to keep from looping.
1041          */
1042         sflag = 1;
1043         if(ipc->gfl == 0)                       /* single substitution */
1044                 dosub(ipc->rhs);
1045         else
1046                 do{                             /* global substitution */
1047                         len = loc2 - loc1;      /* length of match */
1048                         dosub(ipc->rhs);        /* dosub moves loc2 */
1049                         if(*loc2 == 0)          /* end of string */
1050                                 break;
1051                         if(len == 0)            /* zero-length R.E. match */
1052                                 loc2++;         /* bump over 0-length match */
1053                         if(*loc2 == 0)          /* end of string */
1054                                 break;
1055                 } while(match(ipc->re1, loc2));
1056         return 1;
1057 }
1058
1059 void
1060 dosub(Rune *rhsbuf)
1061 {
1062         int c, n;
1063         Rune *lp, *sp, *rp;
1064
1065         lp = linebuf;
1066         sp = genbuf;
1067         rp = rhsbuf;
1068         while (lp < loc1)
1069                 *sp++ = *lp++;
1070         while(c = *rp++) {
1071                 if (c == '&') {
1072                         sp = place(sp, loc1, loc2);
1073                         continue;
1074                 }
1075                 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1076                         n = c-'0';
1077                         if (subexp[n].rsp && subexp[n].rep) {
1078                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1079                                 continue;
1080                         }
1081                         else {
1082                                 quit("Invalid back reference \\%d", n);
1083                         }
1084                 }
1085                 *sp++ = c;
1086                 if (sp >= &genbuf[LBSIZE])
1087                         quit("Output line too long");
1088         }
1089         lp = loc2;
1090         loc2 = sp - genbuf + linebuf;
1091         while (*sp++ = *lp++)
1092                 if (sp >= &genbuf[LBSIZE])
1093                         quit("Output line too long");
1094         lp = linebuf;
1095         sp = genbuf;
1096         while (*lp++ = *sp++)
1097                 ;
1098         spend = lp - 1;
1099 }
1100
1101 Rune *
1102 place(Rune *sp, Rune *l1, Rune *l2)
1103 {
1104         while (l1 < l2) {
1105                 *sp++ = *l1++;
1106                 if (sp >= &genbuf[LBSIZE])
1107                         quit("Output line too long");
1108         }
1109         return sp;
1110 }
1111
1112 char *
1113 trans(int c)
1114 {
1115         static char buf[] = "\\x0000";
1116         static char hex[] = "0123456789abcdef";
1117
1118         switch(c) {
1119         case '\b':
1120                 return "\\b";
1121         case '\n':
1122                 return "\\n";
1123         case '\r':
1124                 return "\\r";
1125         case '\t':
1126                 return "\\t";
1127         case '\\':
1128                 return "\\\\";
1129         }
1130         buf[2] = hex[(c>>12)&0xF];
1131         buf[3] = hex[(c>>8)&0xF];
1132         buf[4] = hex[(c>>4)&0xF];
1133         buf[5] = hex[c&0xF];
1134         return buf;
1135 }
1136
1137 void
1138 command(SedCom *ipc)
1139 {
1140         int i, c;
1141         char *ucp;
1142         Rune *execp, *p1, *p2, *rp;
1143
1144         switch(ipc->command) {
1145         case ACOM:
1146                 *aptr++ = ipc;
1147                 if(aptr >= abuf+MAXADDS)
1148                         quit("Too many appends after line %ld", lnum);
1149                 *aptr = 0;
1150                 break;
1151         case CCOM:
1152                 delflag = 1;
1153                 if(ipc->active == 1) {
1154                         for(rp = ipc->text; *rp; rp++)
1155                                 Bputrune(&fout, *rp);
1156                         Bputc(&fout, '\n');
1157                 }
1158                 break;
1159         case DCOM:
1160                 delflag++;
1161                 break;
1162         case CDCOM:
1163                 p1 = p2 = linebuf;
1164                 while(*p1 != '\n') {
1165                         if(*p1++ == 0) {
1166                                 delflag++;
1167                                 return;
1168                         }
1169                 }
1170                 p1++;
1171                 while(*p2++ = *p1++)
1172                         ;
1173                 spend = p2 - 1;
1174                 jflag++;
1175                 break;
1176         case EQCOM:
1177                 Bprint(&fout, "%ld\n", lnum);
1178                 break;
1179         case GCOM:
1180                 p1 = linebuf;
1181                 p2 = holdsp;
1182                 while(*p1++ = *p2++)
1183                         ;
1184                 spend = p1 - 1;
1185                 break;
1186         case CGCOM:
1187                 *spend++ = '\n';
1188                 p1 = spend;
1189                 p2 = holdsp;
1190                 while(*p1++ = *p2++)
1191                         if(p1 >= lbend)
1192                                 break;
1193                 spend = p1 - 1;
1194                 break;
1195         case HCOM:
1196                 p1 = holdsp;
1197                 p2 = linebuf;
1198                 while(*p1++ = *p2++);
1199                 hspend = p1 - 1;
1200                 break;
1201         case CHCOM:
1202                 *hspend++ = '\n';
1203                 p1 = hspend;
1204                 p2 = linebuf;
1205                 while(*p1++ = *p2++)
1206                         if(p1 >= hend)
1207                                 break;
1208                 hspend = p1 - 1;
1209                 break;
1210         case ICOM:
1211                 for(rp = ipc->text; *rp; rp++)
1212                         Bputrune(&fout, *rp);
1213                 Bputc(&fout, '\n');
1214                 break;
1215         case BCOM:
1216                 jflag = 1;
1217                 break;
1218         case LCOM:
1219                 c = 0;
1220                 for (i = 0, rp = linebuf; *rp; rp++) {
1221                         c = *rp;
1222                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1223                                 Bputc(&fout, c);
1224                                 if(i++ > 71) {
1225                                         Bprint(&fout, "\\\n");
1226                                         i = 0;
1227                                 }
1228                         } else {
1229                                 for (ucp = trans(*rp); *ucp; ucp++){
1230                                         c = *ucp;
1231                                         Bputc(&fout, c);
1232                                         if(i++ > 71) {
1233                                                 Bprint(&fout, "\\\n");
1234                                                 i = 0;
1235                                         }
1236                                 }
1237                         }
1238                 }
1239                 if(c == ' ')
1240                         Bprint(&fout, "\\n");
1241                 Bputc(&fout, '\n');
1242                 break;
1243         case NCOM:
1244                 if(!nflag)
1245                         putline(&fout, linebuf, spend-linebuf);
1246
1247                 if(aptr > abuf)
1248                         arout();
1249                 if((execp = gline(linebuf)) == 0) {
1250                         delflag = 1;
1251                         break;
1252                 }
1253                 spend = execp;
1254                 break;
1255         case CNCOM:
1256                 if(aptr > abuf)
1257                         arout();
1258                 *spend++ = '\n';
1259                 if((execp = gline(spend)) == 0) {
1260                         delflag = 1;
1261                         break;
1262                 }
1263                 spend = execp;
1264                 break;
1265         case PCOM:
1266                 putline(&fout, linebuf, spend-linebuf);
1267                 break;
1268         case CPCOM:
1269 cpcom:
1270                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1271                         Bputc(&fout, *rp);
1272                 Bputc(&fout, '\n');
1273                 break;
1274         case QCOM:
1275                 if(!nflag)
1276                         putline(&fout, linebuf, spend-linebuf);
1277                 if(aptr > abuf)
1278                         arout();
1279                 exits(nil);
1280         case RCOM:
1281                 *aptr++ = ipc;
1282                 if(aptr >= &abuf[MAXADDS])
1283                         quit("Too many reads after line %ld", lnum);
1284                 *aptr = 0;
1285                 break;
1286         case SCOM:
1287                 i = substitute(ipc);
1288                 if(i && ipc->pfl)
1289                         if(ipc->pfl == 1)
1290                                 putline(&fout, linebuf, spend-linebuf);
1291                         else
1292                                 goto cpcom;
1293                 if(i && ipc->fcode)
1294                         goto wcom;
1295                 break;
1296
1297         case TCOM:
1298                 if(sflag) {
1299                         sflag = 0;
1300                         jflag = 1;
1301                 }
1302                 break;
1303
1304         case WCOM:
1305 wcom:
1306                 putline(ipc->fcode,linebuf, spend - linebuf);
1307                 break;
1308         case XCOM:
1309                 p1 = linebuf;
1310                 p2 = genbuf;
1311                 while(*p2++ = *p1++)
1312                         ;
1313                 p1 = holdsp;
1314                 p2 = linebuf;
1315                 while(*p2++ = *p1++)
1316                         ;
1317                 spend = p2 - 1;
1318                 p1 = genbuf;
1319                 p2 = holdsp;
1320                 while(*p2++ = *p1++)
1321                         ;
1322                 hspend = p2 - 1;
1323                 break;
1324         case YCOM:
1325                 p1 = linebuf;
1326                 p2 = ipc->text;
1327                 for (i = *p2++; *p1; p1++)
1328                         if (*p1 <= i)
1329                                 *p1 = p2[*p1];
1330                 break;
1331         }
1332 }
1333
1334 void
1335 putline(Biobuf *bp, Rune *buf, int n)
1336 {
1337         while (n--)
1338                 Bputrune(bp, *buf++);
1339         Bputc(bp, '\n');
1340 }
1341
1342 void
1343 arout(void)
1344 {
1345         int     c;
1346         char    *s, *e;
1347         char    buf[128];
1348         Rune    *p1;
1349         Biobuf  *fi;
1350
1351         for (aptr = abuf; *aptr; aptr++) {
1352                 if((*aptr)->command == ACOM) {
1353                         for(p1 = (*aptr)->text; *p1; p1++ )
1354                                 Bputrune(&fout, *p1);
1355                         Bputc(&fout, '\n');
1356                 } else {
1357                         for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++)
1358                                 s += runetochar(s, p1);
1359                         *s = '\0';
1360                         if((fi = Bopen(buf, OREAD)) == 0)
1361                                 continue;
1362                         Blethal(fi, nil);
1363                         if(uflag) Biofn(fi, flushout);
1364                         while((c = Bgetc(fi)) >= 0)
1365                                 Bputc(&fout, c);
1366                         Bterm(fi);
1367                 }
1368         }
1369         aptr = abuf;
1370         *aptr = 0;
1371 }
1372
1373 void
1374 errexit(void)
1375 {
1376         exits("error");
1377 }
1378
1379 void
1380 quit(char *fmt, ...)
1381 {
1382         char *p, *ep;
1383         char msg[256];
1384         va_list arg;
1385
1386         ep = msg + sizeof msg;
1387         p = seprint(msg, ep, "sed: ");
1388         va_start(arg, fmt);
1389         p = vseprint(p, ep, fmt, arg);
1390         va_end(arg);
1391         p = seprint(p, ep, "\n");
1392         write(2, msg, p - msg);
1393         errexit();
1394 }
1395
1396 Rune *
1397 gline(Rune *addr)
1398 {
1399         long c;
1400         Rune *p;
1401         static long peekc = 0;
1402
1403         if (f == 0 && opendata() < 0)
1404                 return 0;
1405         sflag = 0;
1406         lnum++;
1407 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1408         do {
1409                 p = addr;
1410                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1411                         if (c == '\n') {
1412                                 if (dollars != 0 && (peekc = Bgetrune(f)) < 0 && fhead == nil)
1413                                         dolflag = 1;
1414                                 *p = '\0';
1415                                 return p;
1416                         }
1417                         if (c && p < lbend)
1418                                 *p++ = c;
1419                 }
1420                 /* return partial final line, adding implicit newline */
1421                 if(p != addr) {
1422                         *p = '\0';
1423                         peekc = -1;
1424                         if (fhead == nil)
1425                                 dolflag = 1;
1426                         return p;
1427                 }
1428                 peekc = 0;
1429                 Bterm(f);
1430         } while (opendata() > 0);               /* Switch to next stream */
1431         f = 0;
1432         return 0;
1433 }
1434
1435 /*
1436  * Data file input section - the intent is to transparently
1437  *      catenate all data input streams.
1438  */
1439 void
1440 enroll(char *filename)          /* Add a file to the input file cache */
1441 {
1442         FileCache *fp;
1443
1444         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1445                 quit("Out of memory");
1446         if (ftail == nil)
1447                 fhead = fp;
1448         else
1449                 ftail->next = fp;
1450         ftail = fp;
1451         fp->next = nil;
1452         fp->name = filename;            /* 0 => stdin */
1453 }
1454
1455 int
1456 opendata(void)
1457 {
1458         if (fhead == nil)
1459                 return -1;
1460         if (fhead->name) {
1461                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1462                         quit("Can't open %s", fhead->name);
1463         } else {
1464                 Binit(&stdin, 0, OREAD);
1465                 f = &stdin;
1466         }
1467         Blethal(f, nil);
1468         if(uflag) Biofn(f, flushout);
1469         fhead = fhead->next;
1470         return 1;
1471 }