]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
ndb/dns: double Maxretries for long cname redirection chains
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 Reprog  *lastre;                        /* Last regular expression */
100 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
101
102 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
103 Rune    *addend = addspace+ADDSIZE;
104
105 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
106 SedCom  **aptr = abuf;
107
108 struct {                                /* Sed program input control block */
109         enum PTYPE {                    /* Either on command line or in file */
110                 P_ARG,
111                 P_FILE,
112         } type;
113         union PCTL {                    /* Pointer to data */
114                 Biobuf  *bp;
115                 char    *curr;
116         };
117 } prog;
118
119 Rune    genbuf[LBSIZE+1];               /* Miscellaneous buffer */
120
121 FileCache       *fhead;                 /* Head of File Cache Chain */
122 FileCache       *ftail;                 /* Tail of File Cache Chain */
123
124 Rune    *loc1;                          /* Start of pattern match */
125 Rune    *loc2;                          /* End of pattern match */
126 Rune    seof;                           /* Pattern delimiter char */
127
128 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
129 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
130 Rune    *spend = linebuf;               /* End of input data */
131 Rune    *cp;                            /* Current scan point in linebuf */
132
133 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
134 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
135 Rune    *hspend = holdsp;               /* End of hold data */
136
137 int     nflag;                          /* Command line flags */
138 int     gflag;
139
140 int     dolflag;                        /* Set when at true EOF */
141 int     sflag;                          /* Set when substitution done */
142 int     jflag;                          /* Set when jump required */
143 int     delflag;                        /* Delete current line when set */
144
145 long    lnum;                           /* Input line count */
146
147 char    fname[MAXFILES][40];            /* File name cache */
148 Biobuf  *fcode[MAXFILES];               /* File ID cache */
149 int     nfiles;                         /* Cache fill point */
150
151 Biobuf  fout;                           /* Output stream */
152 Biobuf  stdin;                          /* Default input */
153 Biobuf* f;                              /* Input data */
154
155 Label   ltab[LABSIZE];                  /* Label name symbol table */
156 Label   *labend = ltab+LABSIZE;         /* End of label table */
157 Label   *lab = ltab+1;                  /* Current Fill point */
158
159 int     depth;                          /* {} stack pointer */
160
161 Rune    bad;                            /* Dummy err ptr reference */
162 Rune    *badp = &bad;
163
164
165 char    CGMES[]  =      "%S command garbled: %S";
166 char    TMMES[]  =      "Too much text: %S";
167 char    LTL[]    =      "Label too long: %S";
168 char    AD0MES[] =      "No addresses allowed: %S";
169 char    AD1MES[] =      "Only one address allowed: %S";
170
171 void    address(Addr *);
172 void    arout(void);
173 int     cmp(char *, char *);
174 int     rcmp(Rune *, Rune *);
175 void    command(SedCom *);
176 Reprog  *compile(void);
177 Rune    *compsub(Rune *, Rune *);
178 void    dechain(void);
179 void    dosub(Rune *);
180 void    enroll(char *);
181 void    errexit(void);
182 int     executable(SedCom *);
183 void    execute(void);
184 void    fcomp(void);
185 long    getrune(void);
186 Rune    *gline(Rune *);
187 int     match(Reprog *, Rune *);
188 void    newfile(enum PTYPE, char *);
189 int     opendata(void);
190 Biobuf  *open_file(char *);
191 Rune    *place(Rune *, Rune *, Rune *);
192 void    quit(char *, ...);
193 int     rline(Rune *, Rune *);
194 Label   *search(Label *);
195 int     substitute(SedCom *);
196 char    *text(char *);
197 Rune    *stext(Rune *, Rune *);
198 int     ycomp(SedCom *);
199 char *  trans(int c);
200 void    putline(Biobuf *bp, Rune *buf, int n);
201
202 void
203 main(int argc, char **argv)
204 {
205         int compfl;
206
207         lnum = 0;
208         Binit(&fout, 1, OWRITE);
209         Blethal(&fout, nil);
210         fcode[nfiles++] = &fout;
211         compfl = 0;
212
213         if(argc == 1)
214                 exits(nil);
215         ARGBEGIN{
216         case 'e':
217                 if (argc <= 1)
218                         quit("missing pattern");
219                 newfile(P_ARG, ARGF());
220                 fcomp();
221                 compfl = 1;
222                 continue;
223         case 'f':
224                 if(argc <= 1)
225                         quit("no pattern-file");
226                 newfile(P_FILE, ARGF());
227                 fcomp();
228                 compfl = 1;
229                 continue;
230         case 'g':
231                 gflag++;
232                 continue;
233         case 'n':
234                 nflag++;
235                 continue;
236         default:
237                 quit("Unknown flag: %c", ARGC());
238         } ARGEND
239
240         if(compfl == 0) {
241                 if (--argc < 0)
242                         quit("missing pattern");
243                 newfile(P_ARG, *argv++);
244                 fcomp();
245         }
246
247         if(depth)
248                 quit("Too many {'s");
249
250         ltab[0].address = rep;
251
252         dechain();
253
254         if(argc <= 0)
255                 enroll(nil);            /* Add stdin to cache */
256         else
257                 while(--argc >= 0)
258                         enroll(*argv++);
259         execute();
260         exits(nil);
261 }
262
263 void
264 fcomp(void)
265 {
266         int     i;
267         Label   *lpt;
268         Rune    *tp;
269         SedCom  *pt, *pt1;
270         static Rune     *p = addspace;
271         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
272
273         while (rline(linebuf, lbend) >= 0) {
274                 cp = linebuf;
275 comploop:
276                 while(*cp == L' ' || *cp == L'\t')
277                         cp++;
278                 if(*cp == L'\0' || *cp == L'#')
279                         continue;
280                 if(*cp == L';') {
281                         cp++;
282                         goto comploop;
283                 }
284
285                 address(&rep->ad1);
286                 if (rep->ad1.type != A_NONE) {
287                         if (rep->ad1.type == A_LAST) {
288                                 if (!lastre)
289                                         quit("First RE may not be null");
290                                 rep->ad1.type = A_RE;
291                                 rep->ad1.rp = lastre;
292                         }
293                         if(*cp == L',' || *cp == L';') {
294                                 cp++;
295                                 address(&rep->ad2);
296                                 if (rep->ad2.type == A_LAST) {
297                                         rep->ad2.type = A_RE;
298                                         rep->ad2.rp = lastre;
299                                 }
300                         } else
301                                 rep->ad2.type = A_NONE;
302                 }
303                 while(*cp == L' ' || *cp == L'\t')
304                         cp++;
305
306 swit:
307                 switch(*cp++) {
308                 default:
309                         quit("Unrecognized command: %S", linebuf);
310
311                 case '!':
312                         rep->negfl = 1;
313                         goto swit;
314
315                 case '{':
316                         rep->command = BCOM;
317                         rep->negfl = !rep->negfl;
318                         cmpend[depth++] = &rep->lb1;
319                         if(++rep >= pend)
320                                 quit("Too many commands: %S", linebuf);
321                         if(*cp == '\0')
322                                 continue;
323                         goto comploop;
324
325                 case '}':
326                         if(rep->ad1.type != A_NONE)
327                                 quit(AD0MES, linebuf);
328                         if(--depth < 0)
329                                 quit("Too many }'s");
330                         *cmpend[depth] = rep;
331                         if(*cp == 0)
332                                 continue;
333                         goto comploop;
334
335                 case '=':
336                         rep->command = EQCOM;
337                         if(rep->ad2.type != A_NONE)
338                                 quit(AD1MES, linebuf);
339                         break;
340
341                 case ':':
342                         if(rep->ad1.type != A_NONE)
343                                 quit(AD0MES, linebuf);
344
345                         while(*cp == L' ')
346                                 cp++;
347                         tp = lab->uninm;
348                         while (*cp && *cp != L';' && *cp != L' ' &&
349                             *cp != L'\t' && *cp != L'#') {
350                                 *tp++ = *cp++;
351                                 if(tp >= &lab->uninm[8])
352                                         quit(LTL, linebuf);
353                         }
354                         *tp = L'\0';
355
356                         if (*lab->uninm == L'\0')               /* no label? */
357                                 quit(CGMES, L":", linebuf);
358                         if(lpt = search(lab)) {
359                                 if(lpt->address)
360                                         quit("Duplicate labels: %S", linebuf);
361                         } else {
362                                 lab->chain = 0;
363                                 lpt = lab;
364                                 if(++lab >= labend)
365                                         quit("Too many labels: %S", linebuf);
366                         }
367                         lpt->address = rep;
368                         if (*cp == L'#')
369                                 continue;
370                         rep--;                  /* reuse this slot */
371                         break;
372
373                 case 'a':
374                         rep->command = ACOM;
375                         if(rep->ad2.type != A_NONE)
376                                 quit(AD1MES, linebuf);
377                         if(*cp == L'\\')
378                                 cp++;
379                         if(*cp++ != L'\n')
380                                 quit(CGMES, L"a", linebuf);
381                         rep->text = p;
382                         p = stext(p, addend);
383                         break;
384                 case 'c':
385                         rep->command = CCOM;
386                         if(*cp == L'\\')
387                                 cp++;
388                         if(*cp++ != L'\n')
389                                 quit(CGMES, L"c", linebuf);
390                         rep->text = p;
391                         p = stext(p, addend);
392                         break;
393                 case 'i':
394                         rep->command = ICOM;
395                         if(rep->ad2.type != A_NONE)
396                                 quit(AD1MES, linebuf);
397                         if(*cp == L'\\')
398                                 cp++;
399                         if(*cp++ != L'\n')
400                                 quit(CGMES, L"i", linebuf);
401                         rep->text = p;
402                         p = stext(p, addend);
403                         break;
404
405                 case 'g':
406                         rep->command = GCOM;
407                         break;
408
409                 case 'G':
410                         rep->command = CGCOM;
411                         break;
412
413                 case 'h':
414                         rep->command = HCOM;
415                         break;
416
417                 case 'H':
418                         rep->command = CHCOM;
419                         break;
420
421                 case 't':
422                         rep->command = TCOM;
423                         goto jtcommon;
424
425                 case 'b':
426                         rep->command = BCOM;
427 jtcommon:
428                         while(*cp == L' ')
429                                 cp++;
430                         if(*cp == L'\0' || *cp == L';') {
431                                 /* no label; jump to end */
432                                 if(pt = ltab[0].chain) {
433                                         while((pt1 = pt->lb1) != nil)
434                                                 pt = pt1;
435                                         pt->lb1 = rep;
436                                 } else
437                                         ltab[0].chain = rep;
438                                 break;
439                         }
440
441                         /* copy label into lab->uninm */
442                         tp = lab->uninm;
443                         while((*tp = *cp++) != L'\0' && *tp != L';')
444                                 if(++tp >= &lab->uninm[8])
445                                         quit(LTL, linebuf);
446                         cp--;
447                         *tp = L'\0';
448
449                         if (*lab->uninm == L'\0')
450                                 /* shouldn't get here */
451                                 quit(CGMES, L"b or t", linebuf);
452                         if((lpt = search(lab)) != nil) {
453                                 if(lpt->address)
454                                         rep->lb1 = lpt->address;
455                                 else {
456                                         for(pt = lpt->chain; pt != nil &&
457                                             (pt1 = pt->lb1) != nil; pt = pt1)
458                                                 ;
459                                         if (pt)
460                                                 pt->lb1 = rep;
461                                 }
462                         } else {                        /* add new label */
463                                 lab->chain = rep;
464                                 lab->address = 0;
465                                 if(++lab >= labend)
466                                         quit("Too many labels: %S", linebuf);
467                         }
468                         break;
469
470                 case 'n':
471                         rep->command = NCOM;
472                         break;
473
474                 case 'N':
475                         rep->command = CNCOM;
476                         break;
477
478                 case 'p':
479                         rep->command = PCOM;
480                         break;
481
482                 case 'P':
483                         rep->command = CPCOM;
484                         break;
485
486                 case 'r':
487                         rep->command = RCOM;
488                         if(rep->ad2.type != A_NONE)
489                                 quit(AD1MES, linebuf);
490                         if(*cp++ != L' ')
491                                 quit(CGMES, L"r", linebuf);
492                         rep->text = p;
493                         p = stext(p, addend);
494                         break;
495
496                 case 'd':
497                         rep->command = DCOM;
498                         break;
499
500                 case 'D':
501                         rep->command = CDCOM;
502                         rep->lb1 = pspace;
503                         break;
504
505                 case 'q':
506                         rep->command = QCOM;
507                         if(rep->ad2.type != A_NONE)
508                                 quit(AD1MES, linebuf);
509                         break;
510
511                 case 'l':
512                         rep->command = LCOM;
513                         break;
514
515                 case 's':
516                         rep->command = SCOM;
517                         seof = *cp++;
518                         if ((rep->re1 = compile()) == 0) {
519                                 if(!lastre)
520                                         quit("First RE may not be null");
521                                 rep->re1 = lastre;
522                         }
523                         rep->rhs = p;
524                         if((p = compsub(p, addend)) == 0)
525                                 quit(CGMES, L"s", linebuf);
526                         if(*cp == L'g') {
527                                 cp++;
528                                 rep->gfl++;
529                         } else if(gflag)
530                                 rep->gfl++;
531
532                         if(*cp == L'p') {
533                                 cp++;
534                                 rep->pfl = 1;
535                         }
536
537                         if(*cp == L'P') {
538                                 cp++;
539                                 rep->pfl = 2;
540                         }
541
542                         if(*cp == L'w') {
543                                 cp++;
544                                 if(*cp++ !=  L' ')
545                                         quit(CGMES, L"s", linebuf);
546                                 text(fname[nfiles]);
547                                 for(i = nfiles - 1; i >= 0; i--)
548                                         if(cmp(fname[nfiles], fname[i]) == 0) {
549                                                 rep->fcode = fcode[i];
550                                                 goto done;
551                                         }
552                                 if(nfiles >= MAXFILES)
553                                         quit("Too many files in w commands 1");
554                                 rep->fcode = open_file(fname[nfiles]);
555                         }
556                         break;
557
558                 case 'w':
559                         rep->command = WCOM;
560                         if(*cp++ != L' ')
561                                 quit(CGMES, L"w", linebuf);
562                         text(fname[nfiles]);
563                         for(i = nfiles - 1; i >= 0; i--)
564                                 if(cmp(fname[nfiles], fname[i]) == 0) {
565                                         rep->fcode = fcode[i];
566                                         goto done;
567                                 }
568                         if(nfiles >= MAXFILES){
569                                 fprint(2, "sed: Too many files in w commands 2 \n");
570                                 fprint(2, "nfiles = %d; MAXF = %d\n",
571                                         nfiles, MAXFILES);
572                                 errexit();
573                         }
574                         rep->fcode = open_file(fname[nfiles]);
575                         break;
576
577                 case 'x':
578                         rep->command = XCOM;
579                         break;
580
581                 case 'y':
582                         rep->command = YCOM;
583                         seof = *cp++;
584                         if (ycomp(rep) == 0)
585                                 quit(CGMES, L"y", linebuf);
586                         break;
587
588                 }
589 done:
590                 if(++rep >= pend)
591                         quit("Too many commands, last: %S", linebuf);
592                 if(*cp++ != L'\0') {
593                         if(cp[-1] == L';')
594                                 goto comploop;
595                         quit(CGMES, cp - 1, linebuf);
596                 }
597         }
598 }
599
600 Biobuf *
601 open_file(char *name)
602 {
603         int fd;
604         Biobuf *bp;
605
606         if ((bp = malloc(sizeof(Biobuf))) == 0)
607                 quit("Out of memory");
608         if ((fd = open(name, OWRITE)) < 0 &&
609             (fd = create(name, OWRITE, 0666)) < 0)
610                 quit("Cannot create %s", name);
611         Binit(bp, fd, OWRITE);
612         Blethal(bp, nil);
613         Bseek(bp, 0, 2);
614         fcode[nfiles++] = bp;
615         return bp;
616 }
617
618 Rune *
619 compsub(Rune *rhs, Rune *end)
620 {
621         Rune r;
622
623         while ((r = *cp++) != '\0') {
624                 if(r == '\\') {
625                         if (rhs < end)
626                                 *rhs++ = Runemax;
627                         else
628                                 return 0;
629                         r = *cp++;
630                         if(r == 'n')
631                                 r = '\n';
632                 } else {
633                         if(r == seof) {
634                                 if (rhs < end)
635                                         *rhs++ = '\0';
636                                 else
637                                         return 0;
638                                 return rhs;
639                         }
640                 }
641                 if (rhs < end)
642                         *rhs++ = r;
643                 else
644                         return 0;
645         }
646         return 0;
647 }
648
649 Reprog *
650 compile(void)
651 {
652         Rune c;
653         char *ep;
654         char expbuf[512];
655
656         if((c = *cp++) == seof)         /* L'//' */
657                 return 0;
658         ep = expbuf;
659         do {
660                 if (c == L'\0' || c == L'\n')
661                         quit(TMMES, linebuf);
662                 if (c == L'\\') {
663                         if (ep >= expbuf+sizeof(expbuf))
664                                 quit(TMMES, linebuf);
665                         ep += runetochar(ep, &c);
666                         if ((c = *cp++) == L'n')
667                                 c = L'\n';
668                 }
669                 if (ep >= expbuf + sizeof(expbuf))
670                         quit(TMMES, linebuf);
671                 ep += runetochar(ep, &c);
672         } while ((c = *cp++) != seof);
673         *ep = 0;
674         return lastre = regcomp(expbuf);
675 }
676
677 void
678 regerror(char *s)
679 {
680         USED(s);
681         quit(CGMES, L"r.e.-using", linebuf);
682 }
683
684 void
685 newfile(enum PTYPE type, char *name)
686 {
687         if (type == P_ARG)
688                 prog.curr = name;
689         else {
690                 if ((prog.bp = Bopen(name, OREAD)) == 0)
691                         quit("Cannot open pattern-file: %s\n", name);
692                 Blethal(prog.bp, nil);
693         }
694         prog.type = type;
695 }
696
697 int
698 rline(Rune *buf, Rune *end)
699 {
700         long c;
701         Rune r;
702
703         while ((c = getrune()) >= 0) {
704                 r = c;
705                 if (r == '\\') {
706                         if (buf <= end)
707                                 *buf++ = r;
708                         if ((c = getrune()) < 0)
709                                 break;
710                         r = c;
711                 } else if (r == '\n') {
712                         *buf = '\0';
713                         return 1;
714                 }
715                 if (buf <= end)
716                         *buf++ = r;
717         }
718         *buf = '\0';
719         return -1;
720 }
721
722 long
723 getrune(void)
724 {
725         long c;
726         Rune r;
727         char *p;
728
729         if (prog.type == P_ARG) {
730                 if ((p = prog.curr) != 0) {
731                         if (*p) {
732                                 prog.curr += chartorune(&r, p);
733                                 c = r;
734                         } else {
735                                 c = '\n';       /* fake an end-of-line */
736                                 prog.curr = 0;
737                         }
738                 } else
739                         c = -1;
740         } else if ((c = Bgetrune(prog.bp)) < 0)
741                 Bterm(prog.bp);
742         return c;
743 }
744
745 void
746 address(Addr *ap)
747 {
748         int c;
749         long lno;
750
751         if((c = *cp++) == '$')
752                 ap->type = A_DOL;
753         else if(c == '/') {
754                 seof = c;
755                 if (ap->rp = compile())
756                         ap->type = A_RE;
757                 else
758                         ap->type = A_LAST;
759         }
760         else if (c >= '0' && c <= '9') {
761                 lno = c - '0';
762                 while ((c = *cp) >= '0' && c <= '9')
763                         lno = lno*10 + *cp++ - '0';
764                 if(!lno)
765                         quit("line number 0 is illegal",0);
766                 ap->type = A_LINE;
767                 ap->line = lno;
768         }
769         else {
770                 cp--;
771                 ap->type = A_NONE;
772         }
773 }
774
775 cmp(char *a, char *b)           /* compare characters */
776 {
777         while(*a == *b++)
778                 if (*a == '\0')
779                         return 0;
780                 else
781                         a++;
782         return 1;
783 }
784 rcmp(Rune *a, Rune *b)          /* compare runes */
785 {
786         while(*a == *b++)
787                 if (*a == '\0')
788                         return 0;
789                 else
790                         a++;
791         return 1;
792 }
793
794 char *
795 text(char *p)           /* extract character string */
796 {
797         Rune r;
798
799         while(*cp == ' ' || *cp == '\t')
800                 cp++;
801         while (*cp) {
802                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
803                         break;
804                 if (r == '\n')
805                         while (*cp == ' ' || *cp == '\t')
806                                 cp++;
807                 p += runetochar(p, &r);
808         }
809         *p++ = '\0';
810         return p;
811 }
812
813 Rune *
814 stext(Rune *p, Rune *end)               /* extract rune string */
815 {
816         while(*cp == L' ' || *cp == L'\t')
817                 cp++;
818         while (*cp) {
819                 if (*cp == L'\\' && *++cp == L'\0')
820                         break;
821                 if (p >= end-1)
822                         quit(TMMES, linebuf);
823                 if ((*p++ = *cp++) == L'\n')
824                         while(*cp == L' ' || *cp == L'\t')
825                                 cp++;
826         }
827         *p++ = 0;
828         return p;
829 }
830
831
832 Label *
833 search(Label *ptr)
834 {
835         Label   *rp;
836
837         for (rp = ltab; rp < ptr; rp++)
838                 if(rcmp(rp->uninm, ptr->uninm) == 0)
839                         return(rp);
840         return(0);
841 }
842
843 void
844 dechain(void)
845 {
846         Label   *lptr;
847         SedCom  *rptr, *trptr;
848
849         for(lptr = ltab; lptr < lab; lptr++) {
850                 if(lptr->address == 0)
851                         quit("Undefined label: %S", lptr->uninm);
852                 if(lptr->chain) {
853                         rptr = lptr->chain;
854                         while((trptr = rptr->lb1) != nil) {
855                                 rptr->lb1 = lptr->address;
856                                 rptr = trptr;
857                         }
858                         rptr->lb1 = lptr->address;
859                 }
860         }
861 }
862
863 int
864 ycomp(SedCom *r)
865 {
866         int i;
867         Rune *rp, *sp, *tsp;
868         Rune c, highc;
869
870         highc = 0;
871         for(tsp = cp; *tsp != seof; tsp++) {
872                 if(*tsp == L'\\')
873                         tsp++;
874                 if(*tsp == L'\n' || *tsp == L'\0')
875                         return 0;
876                 if (*tsp > highc)
877                         highc = *tsp;
878         }
879         tsp++;
880         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
881                 quit("Out of memory");
882         *rp++ = highc;                          /* save upper bound */
883         for (i = 0; i <= highc; i++)
884                 rp[i] = i;
885         sp = cp;
886         while((c = *sp++) != seof) {
887                 if(c == L'\\' && *sp == L'n') {
888                         sp++;
889                         c = L'\n';
890                 }
891                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
892                         rp[c] = L'\n';
893                         tsp++;
894                 }
895                 if(rp[c] == seof || rp[c] == L'\0') {
896                         free(r->re1);
897                         r->re1 = nil;
898                         return 0;
899                 }
900         }
901         if(*tsp != seof) {
902                 free(r->re1);
903                 r->re1 = nil;
904                 return 0;
905         }
906         cp = tsp+1;
907         return 1;
908 }
909
910 void
911 execute(void)
912 {
913         SedCom  *ipc;
914
915         while (spend = gline(linebuf)){
916                 for(ipc = pspace; ipc->command; ) {
917                         if (!executable(ipc)) {
918                                 ipc++;
919                                 continue;
920                         }
921                         command(ipc);
922
923                         if(delflag)
924                                 break;
925                         if(jflag) {
926                                 jflag = 0;
927                                 if((ipc = ipc->lb1) == 0)
928                                         break;
929                         } else
930                                 ipc++;
931                 }
932                 if(!nflag && !delflag)
933                         putline(&fout, linebuf, spend - linebuf);
934                 if(aptr > abuf)
935                         arout();
936                 delflag = 0;
937         }
938 }
939
940 /* determine if a statement should be applied to an input line */
941 int
942 executable(SedCom *ipc)
943 {
944         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
945                 if (ipc->active == 1)           /* Second line */
946                         ipc->active = 2;
947                 switch(ipc->ad2.type) {
948                 case A_NONE:            /* No second addr; use first */
949                         ipc->active = 0;
950                         break;
951                 case A_DOL:             /* Accept everything */
952                         return !ipc->negfl;
953                 case A_LINE:            /* Line at end of range? */
954                         if (lnum <= ipc->ad2.line) {
955                                 if (ipc->ad2.line == lnum)
956                                         ipc->active = 0;
957                                 return !ipc->negfl;
958                         }
959                         ipc->active = 0;        /* out of range */
960                         return ipc->negfl;
961                 case A_RE:              /* Check for matching R.E. */
962                         if (match(ipc->ad2.rp, linebuf))
963                                 ipc->active = 0;
964                         return !ipc->negfl;
965                 default:
966                         quit("Internal error");
967                 }
968         }
969         switch (ipc->ad1.type) {        /* Check first address */
970         case A_NONE:                    /* Everything matches */
971                 return !ipc->negfl;
972         case A_DOL:                     /* Only last line */
973                 if (dolflag)
974                         return !ipc->negfl;
975                 break;
976         case A_LINE:                    /* Check line number */
977                 if (ipc->ad1.line == lnum) {
978                         ipc->active = 1;        /* In range */
979                         return !ipc->negfl;
980                 }
981                 break;
982         case A_RE:                      /* Check R.E. */
983                 if (match(ipc->ad1.rp, linebuf)) {
984                         ipc->active = 1;        /* In range */
985                         return !ipc->negfl;
986                 }
987                 break;
988         default:
989                 quit("Internal error");
990         }
991         return ipc->negfl;
992 }
993
994 int
995 match(Reprog *pattern, Rune *buf)
996 {
997         if (!pattern)
998                 return 0;
999         subexp[0].rsp = buf;
1000         subexp[0].ep = 0;
1001         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1002                 loc1 = subexp[0].rsp;
1003                 loc2 = subexp[0].rep;
1004                 return 1;
1005         }
1006         loc1 = loc2 = 0;
1007         return 0;
1008 }
1009
1010 int
1011 substitute(SedCom *ipc)
1012 {
1013         int len;
1014
1015         if(!match(ipc->re1, linebuf))
1016                 return 0;
1017
1018         /*
1019          * we have at least one match.  some patterns, e.g. '$' or '^', can
1020          * produce 0-length matches, so during a global substitute we must
1021          * bump to the character after a 0-length match to keep from looping.
1022          */
1023         sflag = 1;
1024         if(ipc->gfl == 0)                       /* single substitution */
1025                 dosub(ipc->rhs);
1026         else
1027                 do{                             /* global substitution */
1028                         len = loc2 - loc1;      /* length of match */
1029                         dosub(ipc->rhs);        /* dosub moves loc2 */
1030                         if(*loc2 == 0)          /* end of string */
1031                                 break;
1032                         if(len == 0)            /* zero-length R.E. match */
1033                                 loc2++;         /* bump over 0-length match */
1034                         if(*loc2 == 0)          /* end of string */
1035                                 break;
1036                 } while(match(ipc->re1, loc2));
1037         return 1;
1038 }
1039
1040 void
1041 dosub(Rune *rhsbuf)
1042 {
1043         int c, n;
1044         Rune *lp, *sp, *rp;
1045
1046         lp = linebuf;
1047         sp = genbuf;
1048         rp = rhsbuf;
1049         while (lp < loc1)
1050                 *sp++ = *lp++;
1051         while(c = *rp++) {
1052                 if (c == '&') {
1053                         sp = place(sp, loc1, loc2);
1054                         continue;
1055                 }
1056                 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1057                         n = c-'0';
1058                         if (subexp[n].rsp && subexp[n].rep) {
1059                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1060                                 continue;
1061                         }
1062                         else {
1063                                 quit("Invalid back reference \\%d", n);
1064                         }
1065                 }
1066                 *sp++ = c;
1067                 if (sp >= &genbuf[LBSIZE])
1068                         quit("Output line too long");
1069         }
1070         lp = loc2;
1071         loc2 = sp - genbuf + linebuf;
1072         while (*sp++ = *lp++)
1073                 if (sp >= &genbuf[LBSIZE])
1074                         quit("Output line too long");
1075         lp = linebuf;
1076         sp = genbuf;
1077         while (*lp++ = *sp++)
1078                 ;
1079         spend = lp - 1;
1080 }
1081
1082 Rune *
1083 place(Rune *sp, Rune *l1, Rune *l2)
1084 {
1085         while (l1 < l2) {
1086                 *sp++ = *l1++;
1087                 if (sp >= &genbuf[LBSIZE])
1088                         quit("Output line too long");
1089         }
1090         return sp;
1091 }
1092
1093 char *
1094 trans(int c)
1095 {
1096         static char buf[] = "\\x0000";
1097         static char hex[] = "0123456789abcdef";
1098
1099         switch(c) {
1100         case '\b':
1101                 return "\\b";
1102         case '\n':
1103                 return "\\n";
1104         case '\r':
1105                 return "\\r";
1106         case '\t':
1107                 return "\\t";
1108         case '\\':
1109                 return "\\\\";
1110         }
1111         buf[2] = hex[(c>>12)&0xF];
1112         buf[3] = hex[(c>>8)&0xF];
1113         buf[4] = hex[(c>>4)&0xF];
1114         buf[5] = hex[c&0xF];
1115         return buf;
1116 }
1117
1118 void
1119 command(SedCom *ipc)
1120 {
1121         int i, c;
1122         char *ucp;
1123         Rune *execp, *p1, *p2, *rp;
1124
1125         switch(ipc->command) {
1126         case ACOM:
1127                 *aptr++ = ipc;
1128                 if(aptr >= abuf+MAXADDS)
1129                         quit("Too many appends after line %ld", lnum);
1130                 *aptr = 0;
1131                 break;
1132         case CCOM:
1133                 delflag = 1;
1134                 if(ipc->active == 1) {
1135                         for(rp = ipc->text; *rp; rp++)
1136                                 Bputrune(&fout, *rp);
1137                         Bputc(&fout, '\n');
1138                 }
1139                 break;
1140         case DCOM:
1141                 delflag++;
1142                 break;
1143         case CDCOM:
1144                 p1 = p2 = linebuf;
1145                 while(*p1 != '\n') {
1146                         if(*p1++ == 0) {
1147                                 delflag++;
1148                                 return;
1149                         }
1150                 }
1151                 p1++;
1152                 while(*p2++ = *p1++)
1153                         ;
1154                 spend = p2 - 1;
1155                 jflag++;
1156                 break;
1157         case EQCOM:
1158                 Bprint(&fout, "%ld\n", lnum);
1159                 break;
1160         case GCOM:
1161                 p1 = linebuf;
1162                 p2 = holdsp;
1163                 while(*p1++ = *p2++)
1164                         ;
1165                 spend = p1 - 1;
1166                 break;
1167         case CGCOM:
1168                 *spend++ = '\n';
1169                 p1 = spend;
1170                 p2 = holdsp;
1171                 while(*p1++ = *p2++)
1172                         if(p1 >= lbend)
1173                                 break;
1174                 spend = p1 - 1;
1175                 break;
1176         case HCOM:
1177                 p1 = holdsp;
1178                 p2 = linebuf;
1179                 while(*p1++ = *p2++);
1180                 hspend = p1 - 1;
1181                 break;
1182         case CHCOM:
1183                 *hspend++ = '\n';
1184                 p1 = hspend;
1185                 p2 = linebuf;
1186                 while(*p1++ = *p2++)
1187                         if(p1 >= hend)
1188                                 break;
1189                 hspend = p1 - 1;
1190                 break;
1191         case ICOM:
1192                 for(rp = ipc->text; *rp; rp++)
1193                         Bputrune(&fout, *rp);
1194                 Bputc(&fout, '\n');
1195                 break;
1196         case BCOM:
1197                 jflag = 1;
1198                 break;
1199         case LCOM:
1200                 c = 0;
1201                 for (i = 0, rp = linebuf; *rp; rp++) {
1202                         c = *rp;
1203                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1204                                 Bputc(&fout, c);
1205                                 if(i++ > 71) {
1206                                         Bprint(&fout, "\\\n");
1207                                         i = 0;
1208                                 }
1209                         } else {
1210                                 for (ucp = trans(*rp); *ucp; ucp++){
1211                                         c = *ucp;
1212                                         Bputc(&fout, c);
1213                                         if(i++ > 71) {
1214                                                 Bprint(&fout, "\\\n");
1215                                                 i = 0;
1216                                         }
1217                                 }
1218                         }
1219                 }
1220                 if(c == ' ')
1221                         Bprint(&fout, "\\n");
1222                 Bputc(&fout, '\n');
1223                 break;
1224         case NCOM:
1225                 if(!nflag)
1226                         putline(&fout, linebuf, spend-linebuf);
1227
1228                 if(aptr > abuf)
1229                         arout();
1230                 if((execp = gline(linebuf)) == 0) {
1231                         delflag = 1;
1232                         break;
1233                 }
1234                 spend = execp;
1235                 break;
1236         case CNCOM:
1237                 if(aptr > abuf)
1238                         arout();
1239                 *spend++ = '\n';
1240                 if((execp = gline(spend)) == 0) {
1241                         delflag = 1;
1242                         break;
1243                 }
1244                 spend = execp;
1245                 break;
1246         case PCOM:
1247                 putline(&fout, linebuf, spend-linebuf);
1248                 break;
1249         case CPCOM:
1250 cpcom:
1251                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1252                         Bputc(&fout, *rp);
1253                 Bputc(&fout, '\n');
1254                 break;
1255         case QCOM:
1256                 if(!nflag)
1257                         putline(&fout, linebuf, spend-linebuf);
1258                 if(aptr > abuf)
1259                         arout();
1260                 exits(nil);
1261         case RCOM:
1262                 *aptr++ = ipc;
1263                 if(aptr >= &abuf[MAXADDS])
1264                         quit("Too many reads after line %ld", lnum);
1265                 *aptr = 0;
1266                 break;
1267         case SCOM:
1268                 i = substitute(ipc);
1269                 if(i && ipc->pfl)
1270                         if(ipc->pfl == 1)
1271                                 putline(&fout, linebuf, spend-linebuf);
1272                         else
1273                                 goto cpcom;
1274                 if(i && ipc->fcode)
1275                         goto wcom;
1276                 break;
1277
1278         case TCOM:
1279                 if(sflag) {
1280                         sflag = 0;
1281                         jflag = 1;
1282                 }
1283                 break;
1284
1285         case WCOM:
1286 wcom:
1287                 putline(ipc->fcode,linebuf, spend - linebuf);
1288                 break;
1289         case XCOM:
1290                 p1 = linebuf;
1291                 p2 = genbuf;
1292                 while(*p2++ = *p1++)
1293                         ;
1294                 p1 = holdsp;
1295                 p2 = linebuf;
1296                 while(*p2++ = *p1++)
1297                         ;
1298                 spend = p2 - 1;
1299                 p1 = genbuf;
1300                 p2 = holdsp;
1301                 while(*p2++ = *p1++)
1302                         ;
1303                 hspend = p2 - 1;
1304                 break;
1305         case YCOM:
1306                 p1 = linebuf;
1307                 p2 = ipc->text;
1308                 for (i = *p2++; *p1; p1++)
1309                         if (*p1 <= i)
1310                                 *p1 = p2[*p1];
1311                 break;
1312         }
1313 }
1314
1315 void
1316 putline(Biobuf *bp, Rune *buf, int n)
1317 {
1318         while (n--)
1319                 Bputrune(bp, *buf++);
1320         Bputc(bp, '\n');
1321 }
1322
1323 void
1324 arout(void)
1325 {
1326         int     c;
1327         char    *s, *e;
1328         char    buf[128];
1329         Rune    *p1;
1330         Biobuf  *fi;
1331
1332         for (aptr = abuf; *aptr; aptr++) {
1333                 if((*aptr)->command == ACOM) {
1334                         for(p1 = (*aptr)->text; *p1; p1++ )
1335                                 Bputrune(&fout, *p1);
1336                         Bputc(&fout, '\n');
1337                 } else {
1338                         for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++)
1339                                 s += runetochar(s, p1);
1340                         *s = '\0';
1341                         if((fi = Bopen(buf, OREAD)) == 0)
1342                                 continue;
1343                         Blethal(fi, nil);
1344                         while((c = Bgetc(fi)) >= 0)
1345                                 Bputc(&fout, c);
1346                         Bterm(fi);
1347                 }
1348         }
1349         aptr = abuf;
1350         *aptr = 0;
1351 }
1352
1353 void
1354 errexit(void)
1355 {
1356         exits("error");
1357 }
1358
1359 void
1360 quit(char *fmt, ...)
1361 {
1362         char *p, *ep;
1363         char msg[256];
1364         va_list arg;
1365
1366         ep = msg + sizeof msg;
1367         p = seprint(msg, ep, "sed: ");
1368         va_start(arg, fmt);
1369         p = vseprint(p, ep, fmt, arg);
1370         va_end(arg);
1371         p = seprint(p, ep, "\n");
1372         write(2, msg, p - msg);
1373         errexit();
1374 }
1375
1376 Rune *
1377 gline(Rune *addr)
1378 {
1379         long c;
1380         Rune *p;
1381         static long peekc = 0;
1382
1383         if (f == 0 && opendata() < 0)
1384                 return 0;
1385         sflag = 0;
1386         lnum++;
1387 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1388         do {
1389                 p = addr;
1390                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1391                         if (c == '\n') {
1392                                 if ((peekc = Bgetrune(f)) < 0 && fhead == nil)
1393                                         dolflag = 1;
1394                                 *p = '\0';
1395                                 return p;
1396                         }
1397                         if (c && p < lbend)
1398                                 *p++ = c;
1399                 }
1400                 /* return partial final line, adding implicit newline */
1401                 if(p != addr) {
1402                         *p = '\0';
1403                         peekc = -1;
1404                         if (fhead == nil)
1405                                 dolflag = 1;
1406                         return p;
1407                 }
1408                 peekc = 0;
1409                 Bterm(f);
1410         } while (opendata() > 0);               /* Switch to next stream */
1411         f = 0;
1412         return 0;
1413 }
1414
1415 /*
1416  * Data file input section - the intent is to transparently
1417  *      catenate all data input streams.
1418  */
1419 void
1420 enroll(char *filename)          /* Add a file to the input file cache */
1421 {
1422         FileCache *fp;
1423
1424         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1425                 quit("Out of memory");
1426         if (ftail == nil)
1427                 fhead = fp;
1428         else
1429                 ftail->next = fp;
1430         ftail = fp;
1431         fp->next = nil;
1432         fp->name = filename;            /* 0 => stdin */
1433 }
1434
1435 int
1436 opendata(void)
1437 {
1438         if (fhead == nil)
1439                 return -1;
1440         if (fhead->name) {
1441                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1442                         quit("Can't open %s", fhead->name);
1443         } else {
1444                 Binit(&stdin, 0, OREAD);
1445                 f = &stdin;
1446         }
1447         Blethal(f, nil);
1448         fhead = fhead->next;
1449         return 1;
1450 }