]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 Reprog  *lastre = 0;                    /* Last regular expression */
100 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
101
102 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
103 Rune    *addend = addspace+ADDSIZE;
104
105 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
106 SedCom  **aptr = abuf;
107
108 struct {                                /* Sed program input control block */
109         enum PTYPE {                    /* Either on command line or in file */
110                 P_ARG,
111                 P_FILE,
112         } type;
113         union PCTL {                    /* Pointer to data */
114                 Biobuf  *bp;
115                 char    *curr;
116         };
117 } prog;
118
119 Rune    genbuf[LBSIZE];                 /* Miscellaneous buffer */
120
121 FileCache       *fhead = 0;             /* Head of File Cache Chain */
122 FileCache       *ftail = 0;             /* Tail of File Cache Chain */
123
124 Rune    *loc1;                          /* Start of pattern match */
125 Rune    *loc2;                          /* End of pattern match */
126 Rune    seof;                           /* Pattern delimiter char */
127
128 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
129 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
130 Rune    *spend = linebuf;               /* End of input data */
131 Rune    *cp;                            /* Current scan point in linebuf */
132
133 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
134 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
135 Rune    *hspend = holdsp;               /* End of hold data */
136
137 int     nflag;                          /* Command line flags */
138 int     gflag;
139
140 int     dolflag;                        /* Set when at true EOF */
141 int     sflag;                          /* Set when substitution done */
142 int     jflag;                          /* Set when jump required */
143 int     delflag;                        /* Delete current line when set */
144
145 long    lnum = 0;                       /* Input line count */
146
147 char    fname[MAXFILES][40];            /* File name cache */
148 Biobuf  *fcode[MAXFILES];               /* File ID cache */
149 int     nfiles = 0;                     /* Cache fill point */
150
151 Biobuf  fout;                           /* Output stream */
152 Biobuf  stdin;                          /* Default input */
153 Biobuf* f = 0;                          /* Input data */
154
155 Label   ltab[LABSIZE];                  /* Label name symbol table */
156 Label   *labend = ltab+LABSIZE;         /* End of label table */
157 Label   *lab = ltab+1;                  /* Current Fill point */
158
159 int     depth = 0;                      /* {} stack pointer */
160
161 Rune    bad;                            /* Dummy err ptr reference */
162 Rune    *badp = &bad;
163
164
165 char    CGMES[]  =      "%S command garbled: %S";
166 char    TMMES[]  =      "Too much text: %S";
167 char    LTL[]    =      "Label too long: %S";
168 char    AD0MES[] =      "No addresses allowed: %S";
169 char    AD1MES[] =      "Only one address allowed: %S";
170
171 void    address(Addr *);
172 void    arout(void);
173 int     cmp(char *, char *);
174 int     rcmp(Rune *, Rune *);
175 void    command(SedCom *);
176 Reprog  *compile(void);
177 Rune    *compsub(Rune *, Rune *);
178 void    dechain(void);
179 void    dosub(Rune *);
180 int     ecmp(Rune *, Rune *, int);
181 void    enroll(char *);
182 void    errexit(void);
183 int     executable(SedCom *);
184 void    execute(void);
185 void    fcomp(void);
186 long    getrune(void);
187 Rune    *gline(Rune *);
188 int     match(Reprog *, Rune *);
189 void    newfile(enum PTYPE, char *);
190 int     opendata(void);
191 Biobuf  *open_file(char *);
192 Rune    *place(Rune *, Rune *, Rune *);
193 void    quit(char *, ...);
194 int     rline(Rune *, Rune *);
195 Label   *search(Label *);
196 int     substitute(SedCom *);
197 char    *text(char *);
198 Rune    *stext(Rune *, Rune *);
199 int     ycomp(SedCom *);
200 char *  trans(int c);
201 void    putline(Biobuf *bp, Rune *buf, int n);
202
203 void
204 main(int argc, char **argv)
205 {
206         int compfl;
207
208         lnum = 0;
209         Binit(&fout, 1, OWRITE);
210         fcode[nfiles++] = &fout;
211         compfl = 0;
212
213         if(argc == 1)
214                 exits(0);
215         ARGBEGIN{
216         case 'e':
217                 if (argc <= 1)
218                         quit("missing pattern");
219                 newfile(P_ARG, ARGF());
220                 fcomp();
221                 compfl = 1;
222                 continue;
223         case 'f':
224                 if(argc <= 1)
225                         quit("no pattern-file");
226                 newfile(P_FILE, ARGF());
227                 fcomp();
228                 compfl = 1;
229                 continue;
230         case 'g':
231                 gflag++;
232                 continue;
233         case 'n':
234                 nflag++;
235                 continue;
236         default:
237                 fprint(2, "sed: Unknown flag: %c\n", ARGC());
238                 continue;
239         } ARGEND
240
241         if(compfl == 0) {
242                 if (--argc < 0)
243                         quit("missing pattern");
244                 newfile(P_ARG, *argv++);
245                 fcomp();
246         }
247
248         if(depth)
249                 quit("Too many {'s");
250
251         ltab[0].address = rep;
252
253         dechain();
254
255         if(argc <= 0)
256                 enroll(0);              /* Add stdin to cache */
257         else
258                 while(--argc >= 0)
259                         enroll(*argv++);
260         execute();
261         exits(0);
262 }
263
264 void
265 fcomp(void)
266 {
267         int     i;
268         Label   *lpt;
269         Rune    *tp;
270         SedCom  *pt, *pt1;
271         static Rune     *p = addspace;
272         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
273
274         while (rline(linebuf, lbend) >= 0) {
275                 cp = linebuf;
276 comploop:
277                 while(*cp == L' ' || *cp == L'\t')
278                         cp++;
279                 if(*cp == L'\0' || *cp == L'#')
280                         continue;
281                 if(*cp == L';') {
282                         cp++;
283                         goto comploop;
284                 }
285
286                 address(&rep->ad1);
287                 if (rep->ad1.type != A_NONE) {
288                         if (rep->ad1.type == A_LAST) {
289                                 if (!lastre)
290                                         quit("First RE may not be null");
291                                 rep->ad1.type = A_RE;
292                                 rep->ad1.rp = lastre;
293                         }
294                         if(*cp == L',' || *cp == L';') {
295                                 cp++;
296                                 address(&rep->ad2);
297                                 if (rep->ad2.type == A_LAST) {
298                                         rep->ad2.type = A_RE;
299                                         rep->ad2.rp = lastre;
300                                 }
301                         } else
302                                 rep->ad2.type = A_NONE;
303                 }
304                 while(*cp == L' ' || *cp == L'\t')
305                         cp++;
306
307 swit:
308                 switch(*cp++) {
309                 default:
310                         quit("Unrecognized command: %S", linebuf);
311
312                 case '!':
313                         rep->negfl = 1;
314                         goto swit;
315
316                 case '{':
317                         rep->command = BCOM;
318                         rep->negfl = !rep->negfl;
319                         cmpend[depth++] = &rep->lb1;
320                         if(++rep >= pend)
321                                 quit("Too many commands: %S", linebuf);
322                         if(*cp == '\0')
323                                 continue;
324                         goto comploop;
325
326                 case '}':
327                         if(rep->ad1.type != A_NONE)
328                                 quit(AD0MES, linebuf);
329                         if(--depth < 0)
330                                 quit("Too many }'s");
331                         *cmpend[depth] = rep;
332                         if(*cp == 0)
333                                 continue;
334                         goto comploop;
335
336                 case '=':
337                         rep->command = EQCOM;
338                         if(rep->ad2.type != A_NONE)
339                                 quit(AD1MES, linebuf);
340                         break;
341
342                 case ':':
343                         if(rep->ad1.type != A_NONE)
344                                 quit(AD0MES, linebuf);
345
346                         while(*cp == L' ')
347                                 cp++;
348                         tp = lab->uninm;
349                         while (*cp && *cp != L';' && *cp != L' ' &&
350                             *cp != L'\t' && *cp != L'#') {
351                                 *tp++ = *cp++;
352                                 if(tp >= &lab->uninm[8])
353                                         quit(LTL, linebuf);
354                         }
355                         *tp = L'\0';
356
357                         if (*lab->uninm == L'\0')               /* no label? */
358                                 quit(CGMES, L":", linebuf);
359                         if(lpt = search(lab)) {
360                                 if(lpt->address)
361                                         quit("Duplicate labels: %S", linebuf);
362                         } else {
363                                 lab->chain = 0;
364                                 lpt = lab;
365                                 if(++lab >= labend)
366                                         quit("Too many labels: %S", linebuf);
367                         }
368                         lpt->address = rep;
369                         if (*cp == L'#')
370                                 continue;
371                         rep--;                  /* reuse this slot */
372                         break;
373
374                 case 'a':
375                         rep->command = ACOM;
376                         if(rep->ad2.type != A_NONE)
377                                 quit(AD1MES, linebuf);
378                         if(*cp == L'\\')
379                                 cp++;
380                         if(*cp++ != L'\n')
381                                 quit(CGMES, L"a", linebuf);
382                         rep->text = p;
383                         p = stext(p, addend);
384                         break;
385                 case 'c':
386                         rep->command = CCOM;
387                         if(*cp == L'\\')
388                                 cp++;
389                         if(*cp++ != L'\n')
390                                 quit(CGMES, L"c", linebuf);
391                         rep->text = p;
392                         p = stext(p, addend);
393                         break;
394                 case 'i':
395                         rep->command = ICOM;
396                         if(rep->ad2.type != A_NONE)
397                                 quit(AD1MES, linebuf);
398                         if(*cp == L'\\')
399                                 cp++;
400                         if(*cp++ != L'\n')
401                                 quit(CGMES, L"i", linebuf);
402                         rep->text = p;
403                         p = stext(p, addend);
404                         break;
405
406                 case 'g':
407                         rep->command = GCOM;
408                         break;
409
410                 case 'G':
411                         rep->command = CGCOM;
412                         break;
413
414                 case 'h':
415                         rep->command = HCOM;
416                         break;
417
418                 case 'H':
419                         rep->command = CHCOM;
420                         break;
421
422                 case 't':
423                         rep->command = TCOM;
424                         goto jtcommon;
425
426                 case 'b':
427                         rep->command = BCOM;
428 jtcommon:
429                         while(*cp == L' ')
430                                 cp++;
431                         if(*cp == L'\0' || *cp == L';') {
432                                 /* no label; jump to end */
433                                 if(pt = ltab[0].chain) {
434                                         while((pt1 = pt->lb1) != nil)
435                                                 pt = pt1;
436                                         pt->lb1 = rep;
437                                 } else
438                                         ltab[0].chain = rep;
439                                 break;
440                         }
441
442                         /* copy label into lab->uninm */
443                         tp = lab->uninm;
444                         while((*tp = *cp++) != L'\0' && *tp != L';')
445                                 if(++tp >= &lab->uninm[8])
446                                         quit(LTL, linebuf);
447                         cp--;
448                         *tp = L'\0';
449
450                         if (*lab->uninm == L'\0')
451                                 /* shouldn't get here */
452                                 quit(CGMES, L"b or t", linebuf);
453                         if((lpt = search(lab)) != nil) {
454                                 if(lpt->address)
455                                         rep->lb1 = lpt->address;
456                                 else {
457                                         for(pt = lpt->chain; pt != nil &&
458                                             (pt1 = pt->lb1) != nil; pt = pt1)
459                                                 ;
460                                         if (pt)
461                                                 pt->lb1 = rep;
462                                 }
463                         } else {                        /* add new label */
464                                 lab->chain = rep;
465                                 lab->address = 0;
466                                 if(++lab >= labend)
467                                         quit("Too many labels: %S", linebuf);
468                         }
469                         break;
470
471                 case 'n':
472                         rep->command = NCOM;
473                         break;
474
475                 case 'N':
476                         rep->command = CNCOM;
477                         break;
478
479                 case 'p':
480                         rep->command = PCOM;
481                         break;
482
483                 case 'P':
484                         rep->command = CPCOM;
485                         break;
486
487                 case 'r':
488                         rep->command = RCOM;
489                         if(rep->ad2.type != A_NONE)
490                                 quit(AD1MES, linebuf);
491                         if(*cp++ != L' ')
492                                 quit(CGMES, L"r", linebuf);
493                         rep->text = p;
494                         p = stext(p, addend);
495                         break;
496
497                 case 'd':
498                         rep->command = DCOM;
499                         break;
500
501                 case 'D':
502                         rep->command = CDCOM;
503                         rep->lb1 = pspace;
504                         break;
505
506                 case 'q':
507                         rep->command = QCOM;
508                         if(rep->ad2.type != A_NONE)
509                                 quit(AD1MES, linebuf);
510                         break;
511
512                 case 'l':
513                         rep->command = LCOM;
514                         break;
515
516                 case 's':
517                         rep->command = SCOM;
518                         seof = *cp++;
519                         if ((rep->re1 = compile()) == 0) {
520                                 if(!lastre)
521                                         quit("First RE may not be null.");
522                                 rep->re1 = lastre;
523                         }
524                         rep->rhs = p;
525                         if((p = compsub(p, addend)) == 0)
526                                 quit(CGMES, L"s", linebuf);
527                         if(*cp == L'g') {
528                                 cp++;
529                                 rep->gfl++;
530                         } else if(gflag)
531                                 rep->gfl++;
532
533                         if(*cp == L'p') {
534                                 cp++;
535                                 rep->pfl = 1;
536                         }
537
538                         if(*cp == L'P') {
539                                 cp++;
540                                 rep->pfl = 2;
541                         }
542
543                         if(*cp == L'w') {
544                                 cp++;
545                                 if(*cp++ !=  L' ')
546                                         quit(CGMES, L"s", linebuf);
547                                 text(fname[nfiles]);
548                                 for(i = nfiles - 1; i >= 0; i--)
549                                         if(cmp(fname[nfiles], fname[i]) == 0) {
550                                                 rep->fcode = fcode[i];
551                                                 goto done;
552                                         }
553                                 if(nfiles >= MAXFILES)
554                                         quit("Too many files in w commands 1");
555                                 rep->fcode = open_file(fname[nfiles]);
556                         }
557                         break;
558
559                 case 'w':
560                         rep->command = WCOM;
561                         if(*cp++ != L' ')
562                                 quit(CGMES, L"w", linebuf);
563                         text(fname[nfiles]);
564                         for(i = nfiles - 1; i >= 0; i--)
565                                 if(cmp(fname[nfiles], fname[i]) == 0) {
566                                         rep->fcode = fcode[i];
567                                         goto done;
568                                 }
569                         if(nfiles >= MAXFILES){
570                                 fprint(2, "sed: Too many files in w commands 2 \n");
571                                 fprint(2, "nfiles = %d; MAXF = %d\n",
572                                         nfiles, MAXFILES);
573                                 errexit();
574                         }
575                         rep->fcode = open_file(fname[nfiles]);
576                         break;
577
578                 case 'x':
579                         rep->command = XCOM;
580                         break;
581
582                 case 'y':
583                         rep->command = YCOM;
584                         seof = *cp++;
585                         if (ycomp(rep) == 0)
586                                 quit(CGMES, L"y", linebuf);
587                         break;
588
589                 }
590 done:
591                 if(++rep >= pend)
592                         quit("Too many commands, last: %S", linebuf);
593                 if(*cp++ != L'\0') {
594                         if(cp[-1] == L';')
595                                 goto comploop;
596                         quit(CGMES, cp - 1, linebuf);
597                 }
598         }
599 }
600
601 Biobuf *
602 open_file(char *name)
603 {
604         int fd;
605         Biobuf *bp;
606
607         if ((bp = malloc(sizeof(Biobuf))) == 0)
608                 quit("Out of memory");
609         if ((fd = open(name, OWRITE)) < 0 &&
610             (fd = create(name, OWRITE, 0666)) < 0)
611                 quit("Cannot create %s", name);
612         Binit(bp, fd, OWRITE);
613         Bseek(bp, 0, 2);
614         fcode[nfiles++] = bp;
615         return bp;
616 }
617
618 Rune *
619 compsub(Rune *rhs, Rune *end)
620 {
621         Rune r;
622
623         while ((r = *cp++) != '\0') {
624                 if(r == '\\') {
625                         if (rhs < end)
626                                 *rhs++ = 0xFFFF;
627                         else
628                                 return 0;
629                         r = *cp++;
630                         if(r == 'n')
631                                 r = '\n';
632                 } else {
633                         if(r == seof) {
634                                 if (rhs < end)
635                                         *rhs++ = '\0';
636                                 else
637                                         return 0;
638                                 return rhs;
639                         }
640                 }
641                 if (rhs < end)
642                         *rhs++ = r;
643                 else
644                         return 0;
645         }
646         return 0;
647 }
648
649 Reprog *
650 compile(void)
651 {
652         Rune c;
653         char *ep;
654         char expbuf[512];
655
656         if((c = *cp++) == seof)         /* L'//' */
657                 return 0;
658         ep = expbuf;
659         do {
660                 if (c == L'\0' || c == L'\n')
661                         quit(TMMES, linebuf);
662                 if (c == L'\\') {
663                         if (ep >= expbuf+sizeof(expbuf))
664                                 quit(TMMES, linebuf);
665                         ep += runetochar(ep, &c);
666                         if ((c = *cp++) == L'n')
667                                 c = L'\n';
668                 }
669                 if (ep >= expbuf + sizeof(expbuf))
670                         quit(TMMES, linebuf);
671                 ep += runetochar(ep, &c);
672         } while ((c = *cp++) != seof);
673         *ep = 0;
674         return lastre = regcomp(expbuf);
675 }
676
677 void
678 regerror(char *s)
679 {
680         USED(s);
681         quit(CGMES, L"r.e.-using", linebuf);
682 }
683
684 void
685 newfile(enum PTYPE type, char *name)
686 {
687         if (type == P_ARG)
688                 prog.curr = name;
689         else if ((prog.bp = Bopen(name, OREAD)) == 0)
690                 quit("Cannot open pattern-file: %s\n", name);
691         prog.type = type;
692 }
693
694 int
695 rline(Rune *buf, Rune *end)
696 {
697         long c;
698         Rune r;
699
700         while ((c = getrune()) >= 0) {
701                 r = c;
702                 if (r == '\\') {
703                         if (buf <= end)
704                                 *buf++ = r;
705                         if ((c = getrune()) < 0)
706                                 break;
707                         r = c;
708                 } else if (r == '\n') {
709                         *buf = '\0';
710                         return 1;
711                 }
712                 if (buf <= end)
713                         *buf++ = r;
714         }
715         *buf = '\0';
716         return -1;
717 }
718
719 long
720 getrune(void)
721 {
722         long c;
723         Rune r;
724         char *p;
725
726         if (prog.type == P_ARG) {
727                 if ((p = prog.curr) != 0) {
728                         if (*p) {
729                                 prog.curr += chartorune(&r, p);
730                                 c = r;
731                         } else {
732                                 c = '\n';       /* fake an end-of-line */
733                                 prog.curr = 0;
734                         }
735                 } else
736                         c = -1;
737         } else if ((c = Bgetrune(prog.bp)) < 0)
738                 Bterm(prog.bp);
739         return c;
740 }
741
742 void
743 address(Addr *ap)
744 {
745         int c;
746         long lno;
747
748         if((c = *cp++) == '$')
749                 ap->type = A_DOL;
750         else if(c == '/') {
751                 seof = c;
752                 if (ap->rp = compile())
753                         ap->type = A_RE;
754                 else
755                         ap->type = A_LAST;
756         }
757         else if (c >= '0' && c <= '9') {
758                 lno = c - '0';
759                 while ((c = *cp) >= '0' && c <= '9')
760                         lno = lno*10 + *cp++ - '0';
761                 if(!lno)
762                         quit("line number 0 is illegal",0);
763                 ap->type = A_LINE;
764                 ap->line = lno;
765         }
766         else {
767                 cp--;
768                 ap->type = A_NONE;
769         }
770 }
771
772 cmp(char *a, char *b)           /* compare characters */
773 {
774         while(*a == *b++)
775                 if (*a == '\0')
776                         return 0;
777                 else
778                         a++;
779         return 1;
780 }
781 rcmp(Rune *a, Rune *b)          /* compare runes */
782 {
783         while(*a == *b++)
784                 if (*a == '\0')
785                         return 0;
786                 else
787                         a++;
788         return 1;
789 }
790
791 char *
792 text(char *p)           /* extract character string */
793 {
794         Rune r;
795
796         while(*cp == ' ' || *cp == '\t')
797                 cp++;
798         while (*cp) {
799                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
800                         break;
801                 if (r == '\n')
802                         while (*cp == ' ' || *cp == '\t')
803                                 cp++;
804                 p += runetochar(p, &r);
805         }
806         *p++ = '\0';
807         return p;
808 }
809
810 Rune *
811 stext(Rune *p, Rune *end)               /* extract rune string */
812 {
813         while(*cp == L' ' || *cp == L'\t')
814                 cp++;
815         while (*cp) {
816                 if (*cp == L'\\' && *++cp == L'\0')
817                         break;
818                 if (p >= end-1)
819                         quit(TMMES, linebuf);
820                 if ((*p++ = *cp++) == L'\n')
821                         while(*cp == L' ' || *cp == L'\t')
822                                 cp++;
823         }
824         *p++ = 0;
825         return p;
826 }
827
828
829 Label *
830 search(Label *ptr)
831 {
832         Label   *rp;
833
834         for (rp = ltab; rp < ptr; rp++)
835                 if(rcmp(rp->uninm, ptr->uninm) == 0)
836                         return(rp);
837         return(0);
838 }
839
840 void
841 dechain(void)
842 {
843         Label   *lptr;
844         SedCom  *rptr, *trptr;
845
846         for(lptr = ltab; lptr < lab; lptr++) {
847                 if(lptr->address == 0)
848                         quit("Undefined label: %S", lptr->uninm);
849                 if(lptr->chain) {
850                         rptr = lptr->chain;
851                         while((trptr = rptr->lb1) != nil) {
852                                 rptr->lb1 = lptr->address;
853                                 rptr = trptr;
854                         }
855                         rptr->lb1 = lptr->address;
856                 }
857         }
858 }
859
860 int
861 ycomp(SedCom *r)
862 {
863         int i;
864         Rune *rp, *sp, *tsp;
865         Rune c, highc;
866
867         highc = 0;
868         for(tsp = cp; *tsp != seof; tsp++) {
869                 if(*tsp == L'\\')
870                         tsp++;
871                 if(*tsp == L'\n' || *tsp == L'\0')
872                         return 0;
873                 if (*tsp > highc)
874                         highc = *tsp;
875         }
876         tsp++;
877         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
878                 quit("Out of memory");
879         *rp++ = highc;                          /* save upper bound */
880         for (i = 0; i <= highc; i++)
881                 rp[i] = i;
882         sp = cp;
883         while((c = *sp++) != seof) {
884                 if(c == L'\\' && *sp == L'n') {
885                         sp++;
886                         c = L'\n';
887                 }
888                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
889                         rp[c] = L'\n';
890                         tsp++;
891                 }
892                 if(rp[c] == seof || rp[c] == L'\0') {
893                         free(r->re1);
894                         r->re1 = nil;
895                         return 0;
896                 }
897         }
898         if(*tsp != seof) {
899                 free(r->re1);
900                 r->re1 = nil;
901                 return 0;
902         }
903         cp = tsp+1;
904         return 1;
905 }
906
907 void
908 execute(void)
909 {
910         SedCom  *ipc;
911
912         while (spend = gline(linebuf)){
913                 for(ipc = pspace; ipc->command; ) {
914                         if (!executable(ipc)) {
915                                 ipc++;
916                                 continue;
917                         }
918                         command(ipc);
919
920                         if(delflag)
921                                 break;
922                         if(jflag) {
923                                 jflag = 0;
924                                 if((ipc = ipc->lb1) == 0)
925                                         break;
926                         } else
927                                 ipc++;
928                 }
929                 if(!nflag && !delflag)
930                         putline(&fout, linebuf, spend - linebuf);
931                 if(aptr > abuf)
932                         arout();
933                 delflag = 0;
934         }
935 }
936
937 /* determine if a statement should be applied to an input line */
938 int
939 executable(SedCom *ipc)
940 {
941         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
942                 if (ipc->active == 1)           /* Second line */
943                         ipc->active = 2;
944                 switch(ipc->ad2.type) {
945                 case A_NONE:            /* No second addr; use first */
946                         ipc->active = 0;
947                         break;
948                 case A_DOL:             /* Accept everything */
949                         return !ipc->negfl;
950                 case A_LINE:            /* Line at end of range? */
951                         if (lnum <= ipc->ad2.line) {
952                                 if (ipc->ad2.line == lnum)
953                                         ipc->active = 0;
954                                 return !ipc->negfl;
955                         }
956                         ipc->active = 0;        /* out of range */
957                         return ipc->negfl;
958                 case A_RE:              /* Check for matching R.E. */
959                         if (match(ipc->ad2.rp, linebuf))
960                                 ipc->active = 0;
961                         return !ipc->negfl;
962                 default:
963                         quit("Internal error");
964                 }
965         }
966         switch (ipc->ad1.type) {        /* Check first address */
967         case A_NONE:                    /* Everything matches */
968                 return !ipc->negfl;
969         case A_DOL:                     /* Only last line */
970                 if (dolflag)
971                         return !ipc->negfl;
972                 break;
973         case A_LINE:                    /* Check line number */
974                 if (ipc->ad1.line == lnum) {
975                         ipc->active = 1;        /* In range */
976                         return !ipc->negfl;
977                 }
978                 break;
979         case A_RE:                      /* Check R.E. */
980                 if (match(ipc->ad1.rp, linebuf)) {
981                         ipc->active = 1;        /* In range */
982                         return !ipc->negfl;
983                 }
984                 break;
985         default:
986                 quit("Internal error");
987         }
988         return ipc->negfl;
989 }
990
991 int
992 match(Reprog *pattern, Rune *buf)
993 {
994         if (!pattern)
995                 return 0;
996         subexp[0].rsp = buf;
997         subexp[0].ep = 0;
998         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
999                 loc1 = subexp[0].rsp;
1000                 loc2 = subexp[0].rep;
1001                 return 1;
1002         }
1003         loc1 = loc2 = 0;
1004         return 0;
1005 }
1006
1007 int
1008 substitute(SedCom *ipc)
1009 {
1010         int len;
1011
1012         if(!match(ipc->re1, linebuf))
1013                 return 0;
1014
1015         /*
1016          * we have at least one match.  some patterns, e.g. '$' or '^', can
1017          * produce 0-length matches, so during a global substitute we must
1018          * bump to the character after a 0-length match to keep from looping.
1019          */
1020         sflag = 1;
1021         if(ipc->gfl == 0)                       /* single substitution */
1022                 dosub(ipc->rhs);
1023         else
1024                 do{                             /* global substitution */
1025                         len = loc2 - loc1;      /* length of match */
1026                         dosub(ipc->rhs);        /* dosub moves loc2 */
1027                         if(*loc2 == 0)          /* end of string */
1028                                 break;
1029                         if(len == 0)            /* zero-length R.E. match */
1030                                 loc2++;         /* bump over 0-length match */
1031                         if(*loc2 == 0)          /* end of string */
1032                                 break;
1033                 } while(match(ipc->re1, loc2));
1034         return 1;
1035 }
1036
1037 void
1038 dosub(Rune *rhsbuf)
1039 {
1040         int c, n;
1041         Rune *lp, *sp, *rp;
1042
1043         lp = linebuf;
1044         sp = genbuf;
1045         rp = rhsbuf;
1046         while (lp < loc1)
1047                 *sp++ = *lp++;
1048         while(c = *rp++) {
1049                 if (c == '&') {
1050                         sp = place(sp, loc1, loc2);
1051                         continue;
1052                 }
1053                 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1054                         n = c-'0';
1055                         if (subexp[n].rsp && subexp[n].rep) {
1056                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1057                                 continue;
1058                         }
1059                         else {
1060                                 fprint(2, "sed: Invalid back reference \\%d\n",n);
1061                                 errexit();
1062                         }
1063                 }
1064                 *sp++ = c;
1065                 if (sp >= &genbuf[LBSIZE])
1066                         fprint(2, "sed: Output line too long.\n");
1067         }
1068         lp = loc2;
1069         loc2 = sp - genbuf + linebuf;
1070         while (*sp++ = *lp++)
1071                 if (sp >= &genbuf[LBSIZE])
1072                         fprint(2, "sed: Output line too long.\n");
1073         lp = linebuf;
1074         sp = genbuf;
1075         while (*lp++ = *sp++)
1076                 ;
1077         spend = lp - 1;
1078 }
1079
1080 Rune *
1081 place(Rune *sp, Rune *l1, Rune *l2)
1082 {
1083         while (l1 < l2) {
1084                 *sp++ = *l1++;
1085                 if (sp >= &genbuf[LBSIZE])
1086                         fprint(2, "sed: Output line too long.\n");
1087         }
1088         return sp;
1089 }
1090
1091 char *
1092 trans(int c)
1093 {
1094         static char buf[] = "\\x0000";
1095         static char hex[] = "0123456789abcdef";
1096
1097         switch(c) {
1098         case '\b':
1099                 return "\\b";
1100         case '\n':
1101                 return "\\n";
1102         case '\r':
1103                 return "\\r";
1104         case '\t':
1105                 return "\\t";
1106         case '\\':
1107                 return "\\\\";
1108         }
1109         buf[2] = hex[(c>>12)&0xF];
1110         buf[3] = hex[(c>>8)&0xF];
1111         buf[4] = hex[(c>>4)&0xF];
1112         buf[5] = hex[c&0xF];
1113         return buf;
1114 }
1115
1116 void
1117 command(SedCom *ipc)
1118 {
1119         int i, c;
1120         char *ucp;
1121         Rune *execp, *p1, *p2, *rp;
1122
1123         switch(ipc->command) {
1124         case ACOM:
1125                 *aptr++ = ipc;
1126                 if(aptr >= abuf+MAXADDS)
1127                         quit("sed: Too many appends after line %ld\n",
1128                                 (char *)lnum);
1129                 *aptr = 0;
1130                 break;
1131         case CCOM:
1132                 delflag = 1;
1133                 if(ipc->active == 1) {
1134                         for(rp = ipc->text; *rp; rp++)
1135                                 Bputrune(&fout, *rp);
1136                         Bputc(&fout, '\n');
1137                 }
1138                 break;
1139         case DCOM:
1140                 delflag++;
1141                 break;
1142         case CDCOM:
1143                 p1 = p2 = linebuf;
1144                 while(*p1 != '\n') {
1145                         if(*p1++ == 0) {
1146                                 delflag++;
1147                                 return;
1148                         }
1149                 }
1150                 p1++;
1151                 while(*p2++ = *p1++)
1152                         ;
1153                 spend = p2 - 1;
1154                 jflag++;
1155                 break;
1156         case EQCOM:
1157                 Bprint(&fout, "%ld\n", lnum);
1158                 break;
1159         case GCOM:
1160                 p1 = linebuf;
1161                 p2 = holdsp;
1162                 while(*p1++ = *p2++)
1163                         ;
1164                 spend = p1 - 1;
1165                 break;
1166         case CGCOM:
1167                 *spend++ = '\n';
1168                 p1 = spend;
1169                 p2 = holdsp;
1170                 while(*p1++ = *p2++)
1171                         if(p1 >= lbend)
1172                                 break;
1173                 spend = p1 - 1;
1174                 break;
1175         case HCOM:
1176                 p1 = holdsp;
1177                 p2 = linebuf;
1178                 while(*p1++ = *p2++);
1179                 hspend = p1 - 1;
1180                 break;
1181         case CHCOM:
1182                 *hspend++ = '\n';
1183                 p1 = hspend;
1184                 p2 = linebuf;
1185                 while(*p1++ = *p2++)
1186                         if(p1 >= hend)
1187                                 break;
1188                 hspend = p1 - 1;
1189                 break;
1190         case ICOM:
1191                 for(rp = ipc->text; *rp; rp++)
1192                         Bputrune(&fout, *rp);
1193                 Bputc(&fout, '\n');
1194                 break;
1195         case BCOM:
1196                 jflag = 1;
1197                 break;
1198         case LCOM:
1199                 c = 0;
1200                 for (i = 0, rp = linebuf; *rp; rp++) {
1201                         c = *rp;
1202                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1203                                 Bputc(&fout, c);
1204                                 if(i++ > 71) {
1205                                         Bprint(&fout, "\\\n");
1206                                         i = 0;
1207                                 }
1208                         } else {
1209                                 for (ucp = trans(*rp); *ucp; ucp++){
1210                                         c = *ucp;
1211                                         Bputc(&fout, c);
1212                                         if(i++ > 71) {
1213                                                 Bprint(&fout, "\\\n");
1214                                                 i = 0;
1215                                         }
1216                                 }
1217                         }
1218                 }
1219                 if(c == ' ')
1220                         Bprint(&fout, "\\n");
1221                 Bputc(&fout, '\n');
1222                 break;
1223         case NCOM:
1224                 if(!nflag)
1225                         putline(&fout, linebuf, spend-linebuf);
1226
1227                 if(aptr > abuf)
1228                         arout();
1229                 if((execp = gline(linebuf)) == 0) {
1230                         delflag = 1;
1231                         break;
1232                 }
1233                 spend = execp;
1234                 break;
1235         case CNCOM:
1236                 if(aptr > abuf)
1237                         arout();
1238                 *spend++ = '\n';
1239                 if((execp = gline(spend)) == 0) {
1240                         delflag = 1;
1241                         break;
1242                 }
1243                 spend = execp;
1244                 break;
1245         case PCOM:
1246                 putline(&fout, linebuf, spend-linebuf);
1247                 break;
1248         case CPCOM:
1249 cpcom:
1250                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1251                         Bputc(&fout, *rp);
1252                 Bputc(&fout, '\n');
1253                 break;
1254         case QCOM:
1255                 if(!nflag)
1256                         putline(&fout, linebuf, spend-linebuf);
1257                 if(aptr > abuf)
1258                         arout();
1259                 exits(0);
1260         case RCOM:
1261                 *aptr++ = ipc;
1262                 if(aptr >= &abuf[MAXADDS])
1263                         quit("sed: Too many reads after line %ld\n",
1264                                 (char *)lnum);
1265                 *aptr = 0;
1266                 break;
1267         case SCOM:
1268                 i = substitute(ipc);
1269                 if(i && ipc->pfl)
1270                         if(ipc->pfl == 1)
1271                                 putline(&fout, linebuf, spend-linebuf);
1272                         else
1273                                 goto cpcom;
1274                 if(i && ipc->fcode)
1275                         goto wcom;
1276                 break;
1277
1278         case TCOM:
1279                 if(sflag) {
1280                         sflag = 0;
1281                         jflag = 1;
1282                 }
1283                 break;
1284
1285         case WCOM:
1286 wcom:
1287                 putline(ipc->fcode,linebuf, spend - linebuf);
1288                 break;
1289         case XCOM:
1290                 p1 = linebuf;
1291                 p2 = genbuf;
1292                 while(*p2++ = *p1++)
1293                         ;
1294                 p1 = holdsp;
1295                 p2 = linebuf;
1296                 while(*p2++ = *p1++)
1297                         ;
1298                 spend = p2 - 1;
1299                 p1 = genbuf;
1300                 p2 = holdsp;
1301                 while(*p2++ = *p1++)
1302                         ;
1303                 hspend = p2 - 1;
1304                 break;
1305         case YCOM:
1306                 p1 = linebuf;
1307                 p2 = ipc->text;
1308                 for (i = *p2++; *p1; p1++)
1309                         if (*p1 <= i)
1310                                 *p1 = p2[*p1];
1311                 break;
1312         }
1313 }
1314
1315 void
1316 putline(Biobuf *bp, Rune *buf, int n)
1317 {
1318         while (n--)
1319                 Bputrune(bp, *buf++);
1320         Bputc(bp, '\n');
1321 }
1322 ecmp(Rune *a, Rune *b, int count)
1323 {
1324         while(count--)
1325                 if(*a++ != *b++)
1326                         return 0;
1327         return 1;
1328 }
1329
1330 void
1331 arout(void)
1332 {
1333         int     c;
1334         char    *s;
1335         char    buf[128];
1336         Rune    *p1;
1337         Biobuf  *fi;
1338
1339         for (aptr = abuf; *aptr; aptr++) {
1340                 if((*aptr)->command == ACOM) {
1341                         for(p1 = (*aptr)->text; *p1; p1++ )
1342                                 Bputrune(&fout, *p1);
1343                         Bputc(&fout, '\n');
1344                 } else {
1345                         for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1346                                 s += runetochar(s, p1);
1347                         *s = '\0';
1348                         if((fi = Bopen(buf, OREAD)) == 0)
1349                                 continue;
1350                         while((c = Bgetc(fi)) >= 0)
1351                                 Bputc(&fout, c);
1352                         Bterm(fi);
1353                 }
1354         }
1355         aptr = abuf;
1356         *aptr = 0;
1357 }
1358
1359 void
1360 errexit(void)
1361 {
1362         exits("error");
1363 }
1364
1365 void
1366 quit(char *fmt, ...)
1367 {
1368         char *p, *ep;
1369         char msg[256];
1370         va_list arg;
1371
1372         ep = msg + sizeof msg;
1373         p = seprint(msg, ep, "sed: ");
1374         va_start(arg, fmt);
1375         p = vseprint(p, ep, fmt, arg);
1376         va_end(arg);
1377         p = seprint(p, ep, "\n");
1378         write(2, msg, p - msg);
1379         errexit();
1380 }
1381
1382 Rune *
1383 gline(Rune *addr)
1384 {
1385         long c;
1386         Rune *p;
1387         static long peekc = 0;
1388
1389         if (f == 0 && opendata() < 0)
1390                 return 0;
1391         sflag = 0;
1392         lnum++;
1393 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1394         do {
1395                 p = addr;
1396                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1397                         if (c == '\n') {
1398                                 if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1399                                         dolflag = 1;
1400                                 *p = '\0';
1401                                 return p;
1402                         }
1403                         if (c && p < lbend)
1404                                 *p++ = c;
1405                 }
1406                 /* return partial final line, adding implicit newline */
1407                 if(p != addr) {
1408                         *p = '\0';
1409                         peekc = -1;
1410                         if (fhead == 0)
1411                                 dolflag = 1;
1412                         return p;
1413                 }
1414                 peekc = 0;
1415                 Bterm(f);
1416         } while (opendata() > 0);               /* Switch to next stream */
1417         f = 0;
1418         return 0;
1419 }
1420
1421 /*
1422  * Data file input section - the intent is to transparently
1423  *      catenate all data input streams.
1424  */
1425 void
1426 enroll(char *filename)          /* Add a file to the input file cache */
1427 {
1428         FileCache *fp;
1429
1430         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1431                 quit("Out of memory");
1432         if (ftail == nil)
1433                 fhead = fp;
1434         else
1435                 ftail->next = fp;
1436         ftail = fp;
1437         fp->next = nil;
1438         fp->name = filename;            /* 0 => stdin */
1439 }
1440
1441 int
1442 opendata(void)
1443 {
1444         if (fhead == nil)
1445                 return -1;
1446         if (fhead->name) {
1447                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1448                         quit("Can't open %s", fhead->name);
1449         } else {
1450                 Binit(&stdin, 0, OREAD);
1451                 f = &stdin;
1452         }
1453         fhead = fhead->next;
1454         return 1;
1455 }