]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
ptrap: implement filtering on plumb attributes
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 int     dollars;                        /* Number of dollar (first) addresses */
100
101 Reprog  *lastre;                        /* Last regular expression */
102 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
103
104 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
105 Rune    *addend = addspace+ADDSIZE;
106
107 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
108 SedCom  **aptr = abuf;
109
110 struct {                                /* Sed program input control block */
111         enum PTYPE {                    /* Either on command line or in file */
112                 P_ARG,
113                 P_FILE,
114         } type;
115         union PCTL {                    /* Pointer to data */
116                 Biobuf  *bp;
117                 char    *curr;
118         };
119 } prog;
120
121 Rune    genbuf[LBSIZE+1];               /* Miscellaneous buffer */
122
123 FileCache       *fhead;                 /* Head of File Cache Chain */
124 FileCache       *ftail;                 /* Tail of File Cache Chain */
125
126 Rune    *loc1;                          /* Start of pattern match */
127 Rune    *loc2;                          /* End of pattern match */
128 Rune    seof;                           /* Pattern delimiter char */
129
130 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
131 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
132 Rune    *spend = linebuf;               /* End of input data */
133 Rune    *cp;                            /* Current scan point in linebuf */
134
135 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
136 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
137 Rune    *hspend = holdsp;               /* End of hold data */
138
139 int     nflag;                          /* Command line flags */
140 int     gflag;
141 int     uflag;
142
143 int     dolflag;                        /* Set when at true EOF */
144 int     sflag;                          /* Set when substitution done */
145 int     jflag;                          /* Set when jump required */
146 int     delflag;                        /* Delete current line when set */
147
148 long    lnum;                           /* Input line count */
149
150 char    fname[MAXFILES][40];            /* File name cache */
151 Biobuf  *fcode[MAXFILES];               /* File ID cache */
152 int     nfiles;                         /* Cache fill point */
153
154 Biobuf  fout;                           /* Output stream */
155 Biobuf  stdin;                          /* Default input */
156 Biobuf* f;                              /* Input data */
157
158 Label   ltab[LABSIZE];                  /* Label name symbol table */
159 Label   *labend = ltab+LABSIZE;         /* End of label table */
160 Label   *lab = ltab+1;                  /* Current Fill point */
161
162 int     depth;                          /* {} stack pointer */
163
164 Rune    bad;                            /* Dummy err ptr reference */
165 Rune    *badp = &bad;
166
167
168 char    CGMES[]  =      "%S command garbled: %S";
169 char    TMMES[]  =      "Too much text: %S";
170 char    LTL[]    =      "Label too long: %S";
171 char    AD0MES[] =      "No addresses allowed: %S";
172 char    AD1MES[] =      "Only one address allowed: %S";
173
174 void    address(Addr *);
175 void    arout(void);
176 int     cmp(char *, char *);
177 int     rcmp(Rune *, Rune *);
178 void    command(SedCom *);
179 Reprog  *compile(void);
180 Rune    *compsub(Rune *, Rune *);
181 void    dechain(void);
182 void    dosub(Rune *);
183 void    enroll(char *);
184 void    errexit(void);
185 int     executable(SedCom *);
186 void    execute(void);
187 void    fcomp(void);
188 long    getrune(void);
189 Rune    *gline(Rune *);
190 int     match(Reprog *, Rune *);
191 void    newfile(enum PTYPE, char *);
192 int     opendata(void);
193 Biobuf  *open_file(char *);
194 Rune    *place(Rune *, Rune *, Rune *);
195 void    quit(char *, ...);
196 int     rline(Rune *, Rune *);
197 Label   *search(Label *);
198 int     substitute(SedCom *);
199 char    *text(char *);
200 Rune    *stext(Rune *, Rune *);
201 int     ycomp(SedCom *);
202 char *  trans(int c);
203 void    putline(Biobuf *bp, Rune *buf, int n);
204
205 void
206 main(int argc, char **argv)
207 {
208         int compfl;
209
210         lnum = 0;
211         Binit(&fout, 1, OWRITE);
212         Blethal(&fout, nil);
213         fcode[nfiles++] = &fout;
214         compfl = 0;
215
216         if(argc == 1)
217                 exits(nil);
218         ARGBEGIN{
219         case 'e':
220                 if (argc <= 1)
221                         quit("missing pattern");
222                 newfile(P_ARG, ARGF());
223                 fcomp();
224                 compfl = 1;
225                 continue;
226         case 'f':
227                 if(argc <= 1)
228                         quit("no pattern-file");
229                 newfile(P_FILE, ARGF());
230                 fcomp();
231                 compfl = 1;
232                 continue;
233         case 'g':
234                 gflag++;
235                 continue;
236         case 'n':
237                 nflag++;
238                 continue;
239         case 'u':
240                 uflag++;
241                 continue;
242         case 'E': case 'r':     /* unix compat */
243                 continue;
244         default:
245                 quit("Unknown flag: %c", ARGC());
246         } ARGEND
247
248         if(compfl == 0) {
249                 if (--argc < 0)
250                         quit("missing pattern");
251                 newfile(P_ARG, *argv++);
252                 fcomp();
253         }
254
255         if(depth)
256                 quit("Too many {'s");
257
258         ltab[0].address = rep;
259
260         dechain();
261
262         if(argc <= 0)
263                 enroll(nil);            /* Add stdin to cache */
264         else
265                 while(--argc >= 0)
266                         enroll(*argv++);
267         execute();
268         exits(nil);
269 }
270
271 void
272 fcomp(void)
273 {
274         int     i;
275         Label   *lpt;
276         Rune    *tp;
277         SedCom  *pt, *pt1;
278         static Rune     *p = addspace;
279         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
280
281         while (rline(linebuf, lbend) >= 0) {
282                 cp = linebuf;
283 comploop:
284                 while(*cp == L' ' || *cp == L'\t')
285                         cp++;
286                 if(*cp == L'\0' || *cp == L'#')
287                         continue;
288                 if(*cp == L';') {
289                         cp++;
290                         goto comploop;
291                 }
292
293                 address(&rep->ad1);
294                 if (rep->ad1.type != A_NONE) {
295                         if (rep->ad1.type == A_DOL)
296                                 dollars++;
297                         if (rep->ad1.type == A_LAST) {
298                                 if (!lastre)
299                                         quit("First RE may not be null");
300                                 rep->ad1.type = A_RE;
301                                 rep->ad1.rp = lastre;
302                         }
303                         if(*cp == L',' || *cp == L';') {
304                                 cp++;
305                                 address(&rep->ad2);
306                                 if (rep->ad2.type == A_LAST) {
307                                         rep->ad2.type = A_RE;
308                                         rep->ad2.rp = lastre;
309                                 }
310                         } else
311                                 rep->ad2.type = A_NONE;
312                 }
313 swit:
314                 while(*cp == L' ' || *cp == L'\t')
315                         cp++;
316
317                 switch(*cp++) {
318                 default:
319                         quit("Unrecognized command: %S", linebuf);
320
321                 case '!':
322                         rep->negfl = 1;
323                         goto swit;
324
325                 case '{':
326                         rep->command = BCOM;
327                         rep->negfl = !rep->negfl;
328                         cmpend[depth++] = &rep->lb1;
329                         if(++rep >= pend)
330                                 quit("Too many commands: %S", linebuf);
331                         if(*cp == '\0')
332                                 continue;
333                         goto comploop;
334
335                 case '}':
336                         if(rep->ad1.type != A_NONE)
337                                 quit(AD0MES, linebuf);
338                         if(--depth < 0)
339                                 quit("Too many }'s");
340                         *cmpend[depth] = rep;
341                         if(*cp == 0)
342                                 continue;
343                         goto comploop;
344
345                 case '=':
346                         rep->command = EQCOM;
347                         if(rep->ad2.type != A_NONE)
348                                 quit(AD1MES, linebuf);
349                         break;
350
351                 case ':':
352                         if(rep->ad1.type != A_NONE)
353                                 quit(AD0MES, linebuf);
354
355                         while(*cp == L' ')
356                                 cp++;
357                         tp = lab->uninm;
358                         while (*cp && *cp != L';' && *cp != L' ' &&
359                             *cp != L'\t' && *cp != L'#') {
360                                 *tp++ = *cp++;
361                                 if(tp >= &lab->uninm[8])
362                                         quit(LTL, linebuf);
363                         }
364                         *tp = L'\0';
365
366                         if (*lab->uninm == L'\0')               /* no label? */
367                                 quit(CGMES, L":", linebuf);
368                         if(lpt = search(lab)) {
369                                 if(lpt->address)
370                                         quit("Duplicate labels: %S", linebuf);
371                         } else {
372                                 lab->chain = 0;
373                                 lpt = lab;
374                                 if(++lab >= labend)
375                                         quit("Too many labels: %S", linebuf);
376                         }
377                         lpt->address = rep;
378                         if (*cp == L'#')
379                                 continue;
380                         rep--;                  /* reuse this slot */
381                         break;
382
383                 case 'a':
384                         rep->command = ACOM;
385                         if(rep->ad2.type != A_NONE)
386                                 quit(AD1MES, linebuf);
387                         if(*cp == L'\\')
388                                 cp++;
389                         if(*cp++ != L'\n')
390                                 quit(CGMES, L"a", linebuf);
391                         rep->text = p;
392                         p = stext(p, addend);
393                         break;
394                 case 'c':
395                         rep->command = CCOM;
396                         if(*cp == L'\\')
397                                 cp++;
398                         if(*cp++ != L'\n')
399                                 quit(CGMES, L"c", linebuf);
400                         rep->text = p;
401                         p = stext(p, addend);
402                         break;
403                 case 'i':
404                         rep->command = ICOM;
405                         if(rep->ad2.type != A_NONE)
406                                 quit(AD1MES, linebuf);
407                         if(*cp == L'\\')
408                                 cp++;
409                         if(*cp++ != L'\n')
410                                 quit(CGMES, L"i", linebuf);
411                         rep->text = p;
412                         p = stext(p, addend);
413                         break;
414
415                 case 'g':
416                         rep->command = GCOM;
417                         break;
418
419                 case 'G':
420                         rep->command = CGCOM;
421                         break;
422
423                 case 'h':
424                         rep->command = HCOM;
425                         break;
426
427                 case 'H':
428                         rep->command = CHCOM;
429                         break;
430
431                 case 't':
432                         rep->command = TCOM;
433                         goto jtcommon;
434
435                 case 'b':
436                         rep->command = BCOM;
437 jtcommon:
438                         while(*cp == L' ')
439                                 cp++;
440                         if(*cp == L'\0' || *cp == L';') {
441                                 /* no label; jump to end */
442                                 if(pt = ltab[0].chain) {
443                                         while((pt1 = pt->lb1) != nil)
444                                                 pt = pt1;
445                                         pt->lb1 = rep;
446                                 } else
447                                         ltab[0].chain = rep;
448                                 break;
449                         }
450
451                         /* copy label into lab->uninm */
452                         tp = lab->uninm;
453                         while((*tp = *cp++) != L'\0' && *tp != L';')
454                                 if(++tp >= &lab->uninm[8])
455                                         quit(LTL, linebuf);
456                         cp--;
457                         *tp = L'\0';
458
459                         if (*lab->uninm == L'\0')
460                                 /* shouldn't get here */
461                                 quit(CGMES, L"b or t", linebuf);
462                         if((lpt = search(lab)) != nil) {
463                                 if(lpt->address)
464                                         rep->lb1 = lpt->address;
465                                 else {
466                                         for(pt = lpt->chain; pt != nil &&
467                                             (pt1 = pt->lb1) != nil; pt = pt1)
468                                                 ;
469                                         if (pt)
470                                                 pt->lb1 = rep;
471                                 }
472                         } else {                        /* add new label */
473                                 lab->chain = rep;
474                                 lab->address = 0;
475                                 if(++lab >= labend)
476                                         quit("Too many labels: %S", linebuf);
477                         }
478                         break;
479
480                 case 'n':
481                         rep->command = NCOM;
482                         break;
483
484                 case 'N':
485                         rep->command = CNCOM;
486                         break;
487
488                 case 'p':
489                         rep->command = PCOM;
490                         break;
491
492                 case 'P':
493                         rep->command = CPCOM;
494                         break;
495
496                 case 'r':
497                         rep->command = RCOM;
498                         if(rep->ad2.type != A_NONE)
499                                 quit(AD1MES, linebuf);
500                         if(*cp++ != L' ')
501                                 quit(CGMES, L"r", linebuf);
502                         rep->text = p;
503                         p = stext(p, addend);
504                         break;
505
506                 case 'd':
507                         rep->command = DCOM;
508                         break;
509
510                 case 'D':
511                         rep->command = CDCOM;
512                         rep->lb1 = pspace;
513                         break;
514
515                 case 'q':
516                         rep->command = QCOM;
517                         if(rep->ad2.type != A_NONE)
518                                 quit(AD1MES, linebuf);
519                         break;
520
521                 case 'l':
522                         rep->command = LCOM;
523                         break;
524
525                 case 's':
526                         rep->command = SCOM;
527                         seof = *cp++;
528                         if ((rep->re1 = compile()) == 0) {
529                                 if(!lastre)
530                                         quit("First RE may not be null");
531                                 rep->re1 = lastre;
532                         }
533                         rep->rhs = p;
534                         if((p = compsub(p, addend)) == 0)
535                                 quit(CGMES, L"s", linebuf);
536                         if(*cp == L'g') {
537                                 cp++;
538                                 rep->gfl++;
539                         } else if(gflag)
540                                 rep->gfl++;
541
542                         if(*cp == L'p') {
543                                 cp++;
544                                 rep->pfl = 1;
545                         }
546
547                         if(*cp == L'P') {
548                                 cp++;
549                                 rep->pfl = 2;
550                         }
551
552                         if(*cp == L'w') {
553                                 cp++;
554                                 if(*cp++ !=  L' ')
555                                         quit(CGMES, L"s", linebuf);
556                                 text(fname[nfiles]);
557                                 for(i = nfiles - 1; i >= 0; i--)
558                                         if(cmp(fname[nfiles], fname[i]) == 0) {
559                                                 rep->fcode = fcode[i];
560                                                 goto done;
561                                         }
562                                 if(nfiles >= MAXFILES)
563                                         quit("Too many files in w commands 1");
564                                 rep->fcode = open_file(fname[nfiles]);
565                         }
566                         break;
567
568                 case 'w':
569                         rep->command = WCOM;
570                         if(*cp++ != L' ')
571                                 quit(CGMES, L"w", linebuf);
572                         text(fname[nfiles]);
573                         for(i = nfiles - 1; i >= 0; i--)
574                                 if(cmp(fname[nfiles], fname[i]) == 0) {
575                                         rep->fcode = fcode[i];
576                                         goto done;
577                                 }
578                         if(nfiles >= MAXFILES){
579                                 fprint(2, "sed: Too many files in w commands 2 \n");
580                                 fprint(2, "nfiles = %d; MAXF = %d\n",
581                                         nfiles, MAXFILES);
582                                 errexit();
583                         }
584                         rep->fcode = open_file(fname[nfiles]);
585                         break;
586
587                 case 'x':
588                         rep->command = XCOM;
589                         break;
590
591                 case 'y':
592                         rep->command = YCOM;
593                         seof = *cp++;
594                         if (ycomp(rep) == 0)
595                                 quit(CGMES, L"y", linebuf);
596                         break;
597
598                 }
599 done:
600                 if(++rep >= pend)
601                         quit("Too many commands, last: %S", linebuf);
602                 if(*cp++ != L'\0') {
603                         if(cp[-1] == L';')
604                                 goto comploop;
605                         quit(CGMES, cp - 1, linebuf);
606                 }
607         }
608 }
609
610 Biobuf *
611 open_file(char *name)
612 {
613         int fd;
614         Biobuf *bp;
615
616         if ((bp = malloc(sizeof(Biobuf))) == 0)
617                 quit("Out of memory");
618         if ((fd = open(name, OWRITE)) < 0 &&
619             (fd = create(name, OWRITE, 0666)) < 0)
620                 quit("Cannot create %s", name);
621         Binit(bp, fd, OWRITE);
622         Blethal(bp, nil);
623         Bseek(bp, 0, 2);
624         fcode[nfiles++] = bp;
625         return bp;
626 }
627
628 Rune *
629 compsub(Rune *rhs, Rune *end)
630 {
631         Rune r;
632
633         while ((r = *cp++) != '\0') {
634                 if(r == '\\') {
635                         if (rhs < end)
636                                 *rhs++ = Runemax;
637                         else
638                                 return 0;
639                         r = *cp++;
640                         if(r == 'n')
641                                 r = '\n';
642                 } else {
643                         if(r == seof) {
644                                 if (rhs < end)
645                                         *rhs++ = '\0';
646                                 else
647                                         return 0;
648                                 return rhs;
649                         }
650                 }
651                 if (rhs < end)
652                         *rhs++ = r;
653                 else
654                         return 0;
655         }
656         return 0;
657 }
658
659 Reprog *
660 compile(void)
661 {
662         Rune c;
663         char *ep;
664         char expbuf[512];
665
666         if((c = *cp++) == seof)         /* L'//' */
667                 return 0;
668         ep = expbuf;
669         do {
670                 if (c == L'\0' || c == L'\n')
671                         quit(TMMES, linebuf);
672                 if (c == L'\\') {
673                         if (ep >= expbuf+sizeof(expbuf))
674                                 quit(TMMES, linebuf);
675                         ep += runetochar(ep, &c);
676                         if ((c = *cp++) == L'n')
677                                 c = L'\n';
678                 }
679                 if (ep >= expbuf + sizeof(expbuf))
680                         quit(TMMES, linebuf);
681                 ep += runetochar(ep, &c);
682         } while ((c = *cp++) != seof);
683         *ep = 0;
684         return lastre = regcomp(expbuf);
685 }
686
687 void
688 regerror(char *s)
689 {
690         USED(s);
691         quit(CGMES, L"r.e.-using", linebuf);
692 }
693
694 int
695 flushout(Biobufhdr *bp, void *v, long n)
696 {
697         int i;
698         
699         for(i = 0; i < nfiles; i++)
700                 Bflush(fcode[i]);
701         return read(bp->fid, v, n);
702 }
703
704 void
705 newfile(enum PTYPE type, char *name)
706 {
707         if (type == P_ARG)
708                 prog.curr = name;
709         else {
710                 if ((prog.bp = Bopen(name, OREAD)) == 0)
711                         quit("Cannot open pattern-file: %s\n", name);
712                 Blethal(prog.bp, nil);
713                 if(uflag) Biofn(prog.bp, flushout);
714         }
715         prog.type = type;
716 }
717
718 int
719 rline(Rune *buf, Rune *end)
720 {
721         long c;
722         Rune r;
723
724         while ((c = getrune()) >= 0) {
725                 r = c;
726                 if (r == '\\') {
727                         if (buf <= end)
728                                 *buf++ = r;
729                         if ((c = getrune()) < 0)
730                                 break;
731                         r = c;
732                 } else if (r == '\n') {
733                         *buf = '\0';
734                         return 1;
735                 }
736                 if (buf <= end)
737                         *buf++ = r;
738         }
739         *buf = '\0';
740         return -1;
741 }
742
743 long
744 getrune(void)
745 {
746         long c;
747         Rune r;
748         char *p;
749
750         if (prog.type == P_ARG) {
751                 if ((p = prog.curr) != 0) {
752                         if (*p) {
753                                 prog.curr += chartorune(&r, p);
754                                 c = r;
755                         } else {
756                                 c = '\n';       /* fake an end-of-line */
757                                 prog.curr = 0;
758                         }
759                 } else
760                         c = -1;
761         } else if ((c = Bgetrune(prog.bp)) < 0)
762                 Bterm(prog.bp);
763         return c;
764 }
765
766 void
767 address(Addr *ap)
768 {
769         int c;
770         long lno;
771
772         if((c = *cp++) == '$')
773                 ap->type = A_DOL;
774         else if(c == '/') {
775                 seof = c;
776                 if (ap->rp = compile())
777                         ap->type = A_RE;
778                 else
779                         ap->type = A_LAST;
780         }
781         else if (c >= '0' && c <= '9') {
782                 lno = c - '0';
783                 while ((c = *cp) >= '0' && c <= '9')
784                         lno = lno*10 + *cp++ - '0';
785                 if(!lno)
786                         quit("line number 0 is illegal",0);
787                 ap->type = A_LINE;
788                 ap->line = lno;
789         }
790         else {
791                 cp--;
792                 ap->type = A_NONE;
793         }
794 }
795
796 cmp(char *a, char *b)           /* compare characters */
797 {
798         while(*a == *b++)
799                 if (*a == '\0')
800                         return 0;
801                 else
802                         a++;
803         return 1;
804 }
805 rcmp(Rune *a, Rune *b)          /* compare runes */
806 {
807         while(*a == *b++)
808                 if (*a == '\0')
809                         return 0;
810                 else
811                         a++;
812         return 1;
813 }
814
815 char *
816 text(char *p)           /* extract character string */
817 {
818         Rune r;
819
820         while(*cp == ' ' || *cp == '\t')
821                 cp++;
822         while (*cp) {
823                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
824                         break;
825                 if (r == '\n')
826                         while (*cp == ' ' || *cp == '\t')
827                                 cp++;
828                 p += runetochar(p, &r);
829         }
830         *p++ = '\0';
831         return p;
832 }
833
834 Rune *
835 stext(Rune *p, Rune *end)               /* extract rune string */
836 {
837         while(*cp == L' ' || *cp == L'\t')
838                 cp++;
839         while (*cp) {
840                 if (*cp == L'\\' && *++cp == L'\0')
841                         break;
842                 if (p >= end-1)
843                         quit(TMMES, linebuf);
844                 if ((*p++ = *cp++) == L'\n')
845                         while(*cp == L' ' || *cp == L'\t')
846                                 cp++;
847         }
848         *p++ = 0;
849         return p;
850 }
851
852
853 Label *
854 search(Label *ptr)
855 {
856         Label   *rp;
857
858         for (rp = ltab; rp < ptr; rp++)
859                 if(rcmp(rp->uninm, ptr->uninm) == 0)
860                         return(rp);
861         return(0);
862 }
863
864 void
865 dechain(void)
866 {
867         Label   *lptr;
868         SedCom  *rptr, *trptr;
869
870         for(lptr = ltab; lptr < lab; lptr++) {
871                 if(lptr->address == 0)
872                         quit("Undefined label: %S", lptr->uninm);
873                 if(lptr->chain) {
874                         rptr = lptr->chain;
875                         while((trptr = rptr->lb1) != nil) {
876                                 rptr->lb1 = lptr->address;
877                                 rptr = trptr;
878                         }
879                         rptr->lb1 = lptr->address;
880                 }
881         }
882 }
883
884 int
885 ycomp(SedCom *r)
886 {
887         int i;
888         Rune *rp, *sp, *tsp;
889         Rune c, highc;
890
891         highc = 0;
892         for(tsp = cp; *tsp != seof; tsp++) {
893                 if(*tsp == L'\\')
894                         tsp++;
895                 if(*tsp == L'\n' || *tsp == L'\0')
896                         return 0;
897                 if (*tsp > highc)
898                         highc = *tsp;
899         }
900         tsp++;
901         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
902                 quit("Out of memory");
903         *rp++ = highc;                          /* save upper bound */
904         for (i = 0; i <= highc; i++)
905                 rp[i] = i;
906         sp = cp;
907         while((c = *sp++) != seof) {
908                 if(c == L'\\' && *sp == L'n') {
909                         sp++;
910                         c = L'\n';
911                 }
912                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
913                         rp[c] = L'\n';
914                         tsp++;
915                 }
916                 if(rp[c] == seof || rp[c] == L'\0') {
917                         free(r->re1);
918                         r->re1 = nil;
919                         return 0;
920                 }
921         }
922         if(*tsp != seof) {
923                 free(r->re1);
924                 r->re1 = nil;
925                 return 0;
926         }
927         cp = tsp+1;
928         return 1;
929 }
930
931 void
932 execute(void)
933 {
934         SedCom  *ipc;
935
936         while (spend = gline(linebuf)){
937                 for(ipc = pspace; ipc->command; ) {
938                         if (!executable(ipc)) {
939                                 ipc++;
940                                 continue;
941                         }
942                         command(ipc);
943
944                         if(delflag)
945                                 break;
946                         if(jflag) {
947                                 jflag = 0;
948                                 if((ipc = ipc->lb1) == 0)
949                                         break;
950                         } else
951                                 ipc++;
952                 }
953                 if(!nflag && !delflag)
954                         putline(&fout, linebuf, spend - linebuf);
955                 if(aptr > abuf)
956                         arout();
957                 delflag = 0;
958         }
959 }
960
961 /* determine if a statement should be applied to an input line */
962 int
963 executable(SedCom *ipc)
964 {
965         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
966                 if (ipc->active == 1)           /* Second line */
967                         ipc->active = 2;
968                 switch(ipc->ad2.type) {
969                 case A_NONE:            /* No second addr; use first */
970                         ipc->active = 0;
971                         break;
972                 case A_DOL:             /* Accept everything */
973                         return !ipc->negfl;
974                 case A_LINE:            /* Line at end of range? */
975                         if (lnum <= ipc->ad2.line) {
976                                 if (ipc->ad2.line == lnum)
977                                         ipc->active = 0;
978                                 return !ipc->negfl;
979                         }
980                         ipc->active = 0;        /* out of range */
981                         return ipc->negfl;
982                 case A_RE:              /* Check for matching R.E. */
983                         if (match(ipc->ad2.rp, linebuf))
984                                 ipc->active = 0;
985                         return !ipc->negfl;
986                 default:
987                         quit("Internal error");
988                 }
989         }
990         switch (ipc->ad1.type) {        /* Check first address */
991         case A_NONE:                    /* Everything matches */
992                 return !ipc->negfl;
993         case A_DOL:                     /* Only last line */
994                 if (dolflag)
995                         return !ipc->negfl;
996                 break;
997         case A_LINE:                    /* Check line number */
998                 if (ipc->ad1.line == lnum) {
999                         ipc->active = 1;        /* In range */
1000                         return !ipc->negfl;
1001                 }
1002                 break;
1003         case A_RE:                      /* Check R.E. */
1004                 if (match(ipc->ad1.rp, linebuf)) {
1005                         ipc->active = 1;        /* In range */
1006                         return !ipc->negfl;
1007                 }
1008                 break;
1009         default:
1010                 quit("Internal error");
1011         }
1012         return ipc->negfl;
1013 }
1014
1015 int
1016 match(Reprog *pattern, Rune *buf)
1017 {
1018         if (!pattern)
1019                 return 0;
1020         subexp[0].rsp = buf;
1021         subexp[0].ep = 0;
1022         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1023                 loc1 = subexp[0].rsp;
1024                 loc2 = subexp[0].rep;
1025                 return 1;
1026         }
1027         loc1 = loc2 = 0;
1028         return 0;
1029 }
1030
1031 int
1032 substitute(SedCom *ipc)
1033 {
1034         int len;
1035
1036         if(!match(ipc->re1, linebuf))
1037                 return 0;
1038
1039         /*
1040          * we have at least one match.  some patterns, e.g. '$' or '^', can
1041          * produce 0-length matches, so during a global substitute we must
1042          * bump to the character after a 0-length match to keep from looping.
1043          */
1044         sflag = 1;
1045         if(ipc->gfl == 0)                       /* single substitution */
1046                 dosub(ipc->rhs);
1047         else
1048                 do{                             /* global substitution */
1049                         len = loc2 - loc1;      /* length of match */
1050                         dosub(ipc->rhs);        /* dosub moves loc2 */
1051                         if(*loc2 == 0)          /* end of string */
1052                                 break;
1053                         if(len == 0)            /* zero-length R.E. match */
1054                                 loc2++;         /* bump over 0-length match */
1055                         if(*loc2 == 0)          /* end of string */
1056                                 break;
1057                 } while(match(ipc->re1, loc2));
1058         return 1;
1059 }
1060
1061 void
1062 dosub(Rune *rhsbuf)
1063 {
1064         int c, n;
1065         Rune *lp, *sp, *rp;
1066
1067         lp = linebuf;
1068         sp = genbuf;
1069         rp = rhsbuf;
1070         while (lp < loc1)
1071                 *sp++ = *lp++;
1072         while(c = *rp++) {
1073                 if (c == '&') {
1074                         sp = place(sp, loc1, loc2);
1075                         continue;
1076                 }
1077                 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1078                         n = c-'0';
1079                         if (subexp[n].rsp && subexp[n].rep) {
1080                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1081                                 continue;
1082                         }
1083                         else {
1084                                 quit("Invalid back reference \\%d", n);
1085                         }
1086                 }
1087                 *sp++ = c;
1088                 if (sp >= &genbuf[LBSIZE])
1089                         quit("Output line too long");
1090         }
1091         lp = loc2;
1092         loc2 = sp - genbuf + linebuf;
1093         while (*sp++ = *lp++)
1094                 if (sp >= &genbuf[LBSIZE])
1095                         quit("Output line too long");
1096         lp = linebuf;
1097         sp = genbuf;
1098         while (*lp++ = *sp++)
1099                 ;
1100         spend = lp - 1;
1101 }
1102
1103 Rune *
1104 place(Rune *sp, Rune *l1, Rune *l2)
1105 {
1106         while (l1 < l2) {
1107                 *sp++ = *l1++;
1108                 if (sp >= &genbuf[LBSIZE])
1109                         quit("Output line too long");
1110         }
1111         return sp;
1112 }
1113
1114 char *
1115 trans(int c)
1116 {
1117         static char buf[] = "\\x0000";
1118         static char hex[] = "0123456789abcdef";
1119
1120         switch(c) {
1121         case '\b':
1122                 return "\\b";
1123         case '\n':
1124                 return "\\n";
1125         case '\r':
1126                 return "\\r";
1127         case '\t':
1128                 return "\\t";
1129         case '\\':
1130                 return "\\\\";
1131         }
1132         buf[2] = hex[(c>>12)&0xF];
1133         buf[3] = hex[(c>>8)&0xF];
1134         buf[4] = hex[(c>>4)&0xF];
1135         buf[5] = hex[c&0xF];
1136         return buf;
1137 }
1138
1139 void
1140 command(SedCom *ipc)
1141 {
1142         int i, c;
1143         char *ucp;
1144         Rune *execp, *p1, *p2, *rp;
1145
1146         switch(ipc->command) {
1147         case ACOM:
1148                 *aptr++ = ipc;
1149                 if(aptr >= abuf+MAXADDS)
1150                         quit("Too many appends after line %ld", lnum);
1151                 *aptr = 0;
1152                 break;
1153         case CCOM:
1154                 delflag = 1;
1155                 if(ipc->active == 1) {
1156                         for(rp = ipc->text; *rp; rp++)
1157                                 Bputrune(&fout, *rp);
1158                         Bputc(&fout, '\n');
1159                 }
1160                 break;
1161         case DCOM:
1162                 delflag++;
1163                 break;
1164         case CDCOM:
1165                 p1 = p2 = linebuf;
1166                 while(*p1 != '\n') {
1167                         if(*p1++ == 0) {
1168                                 delflag++;
1169                                 return;
1170                         }
1171                 }
1172                 p1++;
1173                 while(*p2++ = *p1++)
1174                         ;
1175                 spend = p2 - 1;
1176                 jflag++;
1177                 break;
1178         case EQCOM:
1179                 Bprint(&fout, "%ld\n", lnum);
1180                 break;
1181         case GCOM:
1182                 p1 = linebuf;
1183                 p2 = holdsp;
1184                 while(*p1++ = *p2++)
1185                         ;
1186                 spend = p1 - 1;
1187                 break;
1188         case CGCOM:
1189                 *spend++ = '\n';
1190                 p1 = spend;
1191                 p2 = holdsp;
1192                 while(*p1++ = *p2++)
1193                         if(p1 >= lbend)
1194                                 break;
1195                 spend = p1 - 1;
1196                 break;
1197         case HCOM:
1198                 p1 = holdsp;
1199                 p2 = linebuf;
1200                 while(*p1++ = *p2++);
1201                 hspend = p1 - 1;
1202                 break;
1203         case CHCOM:
1204                 *hspend++ = '\n';
1205                 p1 = hspend;
1206                 p2 = linebuf;
1207                 while(*p1++ = *p2++)
1208                         if(p1 >= hend)
1209                                 break;
1210                 hspend = p1 - 1;
1211                 break;
1212         case ICOM:
1213                 for(rp = ipc->text; *rp; rp++)
1214                         Bputrune(&fout, *rp);
1215                 Bputc(&fout, '\n');
1216                 break;
1217         case BCOM:
1218                 jflag = 1;
1219                 break;
1220         case LCOM:
1221                 c = 0;
1222                 for (i = 0, rp = linebuf; *rp; rp++) {
1223                         c = *rp;
1224                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1225                                 Bputc(&fout, c);
1226                                 if(i++ > 71) {
1227                                         Bprint(&fout, "\\\n");
1228                                         i = 0;
1229                                 }
1230                         } else {
1231                                 for (ucp = trans(*rp); *ucp; ucp++){
1232                                         c = *ucp;
1233                                         Bputc(&fout, c);
1234                                         if(i++ > 71) {
1235                                                 Bprint(&fout, "\\\n");
1236                                                 i = 0;
1237                                         }
1238                                 }
1239                         }
1240                 }
1241                 if(c == ' ')
1242                         Bprint(&fout, "\\n");
1243                 Bputc(&fout, '\n');
1244                 break;
1245         case NCOM:
1246                 if(!nflag)
1247                         putline(&fout, linebuf, spend-linebuf);
1248
1249                 if(aptr > abuf)
1250                         arout();
1251                 if((execp = gline(linebuf)) == 0) {
1252                         delflag = 1;
1253                         break;
1254                 }
1255                 spend = execp;
1256                 break;
1257         case CNCOM:
1258                 if(aptr > abuf)
1259                         arout();
1260                 *spend++ = '\n';
1261                 if((execp = gline(spend)) == 0) {
1262                         delflag = 1;
1263                         break;
1264                 }
1265                 spend = execp;
1266                 break;
1267         case PCOM:
1268                 putline(&fout, linebuf, spend-linebuf);
1269                 break;
1270         case CPCOM:
1271 cpcom:
1272                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1273                         Bputc(&fout, *rp);
1274                 Bputc(&fout, '\n');
1275                 break;
1276         case QCOM:
1277                 if(!nflag)
1278                         putline(&fout, linebuf, spend-linebuf);
1279                 if(aptr > abuf)
1280                         arout();
1281                 exits(nil);
1282         case RCOM:
1283                 *aptr++ = ipc;
1284                 if(aptr >= &abuf[MAXADDS])
1285                         quit("Too many reads after line %ld", lnum);
1286                 *aptr = 0;
1287                 break;
1288         case SCOM:
1289                 i = substitute(ipc);
1290                 if(i && ipc->pfl)
1291                         if(ipc->pfl == 1)
1292                                 putline(&fout, linebuf, spend-linebuf);
1293                         else
1294                                 goto cpcom;
1295                 if(i && ipc->fcode)
1296                         goto wcom;
1297                 break;
1298
1299         case TCOM:
1300                 if(sflag) {
1301                         sflag = 0;
1302                         jflag = 1;
1303                 }
1304                 break;
1305
1306         case WCOM:
1307 wcom:
1308                 putline(ipc->fcode,linebuf, spend - linebuf);
1309                 break;
1310         case XCOM:
1311                 p1 = linebuf;
1312                 p2 = genbuf;
1313                 while(*p2++ = *p1++)
1314                         ;
1315                 p1 = holdsp;
1316                 p2 = linebuf;
1317                 while(*p2++ = *p1++)
1318                         ;
1319                 spend = p2 - 1;
1320                 p1 = genbuf;
1321                 p2 = holdsp;
1322                 while(*p2++ = *p1++)
1323                         ;
1324                 hspend = p2 - 1;
1325                 break;
1326         case YCOM:
1327                 p1 = linebuf;
1328                 p2 = ipc->text;
1329                 for (i = *p2++; *p1; p1++)
1330                         if (*p1 <= i)
1331                                 *p1 = p2[*p1];
1332                 break;
1333         }
1334 }
1335
1336 void
1337 putline(Biobuf *bp, Rune *buf, int n)
1338 {
1339         while (n--)
1340                 Bputrune(bp, *buf++);
1341         Bputc(bp, '\n');
1342 }
1343
1344 void
1345 arout(void)
1346 {
1347         int     c;
1348         char    *s, *e;
1349         char    buf[128];
1350         Rune    *p1;
1351         Biobuf  *fi;
1352
1353         for (aptr = abuf; *aptr; aptr++) {
1354                 if((*aptr)->command == ACOM) {
1355                         for(p1 = (*aptr)->text; *p1; p1++ )
1356                                 Bputrune(&fout, *p1);
1357                         Bputc(&fout, '\n');
1358                 } else {
1359                         for(s = buf, e = buf+sizeof(buf)-UTFmax-1, p1 = (*aptr)->text; *p1 && s < e; p1++)
1360                                 s += runetochar(s, p1);
1361                         *s = '\0';
1362                         if((fi = Bopen(buf, OREAD)) == 0)
1363                                 continue;
1364                         Blethal(fi, nil);
1365                         if(uflag) Biofn(fi, flushout);
1366                         while((c = Bgetc(fi)) >= 0)
1367                                 Bputc(&fout, c);
1368                         Bterm(fi);
1369                 }
1370         }
1371         aptr = abuf;
1372         *aptr = 0;
1373 }
1374
1375 void
1376 errexit(void)
1377 {
1378         exits("error");
1379 }
1380
1381 void
1382 quit(char *fmt, ...)
1383 {
1384         char *p, *ep;
1385         char msg[256];
1386         va_list arg;
1387
1388         ep = msg + sizeof msg;
1389         p = seprint(msg, ep, "sed: ");
1390         va_start(arg, fmt);
1391         p = vseprint(p, ep, fmt, arg);
1392         va_end(arg);
1393         p = seprint(p, ep, "\n");
1394         write(2, msg, p - msg);
1395         errexit();
1396 }
1397
1398 Rune *
1399 gline(Rune *addr)
1400 {
1401         long c;
1402         Rune *p;
1403         static long peekc = 0;
1404
1405         if (f == 0 && opendata() < 0)
1406                 return 0;
1407         sflag = 0;
1408         lnum++;
1409 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1410         do {
1411                 p = addr;
1412                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1413                         if (c == '\n') {
1414                                 if (dollars != 0 && (peekc = Bgetrune(f)) < 0 && fhead == nil)
1415                                         dolflag = 1;
1416                                 *p = '\0';
1417                                 return p;
1418                         }
1419                         if (c && p < lbend)
1420                                 *p++ = c;
1421                 }
1422                 /* return partial final line, adding implicit newline */
1423                 if(p != addr) {
1424                         *p = '\0';
1425                         peekc = -1;
1426                         if (fhead == nil)
1427                                 dolflag = 1;
1428                         return p;
1429                 }
1430                 peekc = 0;
1431                 Bterm(f);
1432         } while (opendata() > 0);               /* Switch to next stream */
1433         f = 0;
1434         return 0;
1435 }
1436
1437 /*
1438  * Data file input section - the intent is to transparently
1439  *      catenate all data input streams.
1440  */
1441 void
1442 enroll(char *filename)          /* Add a file to the input file cache */
1443 {
1444         FileCache *fp;
1445
1446         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1447                 quit("Out of memory");
1448         if (ftail == nil)
1449                 fhead = fp;
1450         else
1451                 ftail->next = fp;
1452         ftail = fp;
1453         fp->next = nil;
1454         fp->name = filename;            /* 0 => stdin */
1455 }
1456
1457 int
1458 opendata(void)
1459 {
1460         if (fhead == nil)
1461                 return -1;
1462         if (fhead->name) {
1463                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1464                         quit("Can't open %s", fhead->name);
1465         } else {
1466                 Binit(&stdin, 0, OREAD);
1467                 f = &stdin;
1468         }
1469         Blethal(f, nil);
1470         if(uflag) Biofn(f, flushout);
1471         fhead = fhead->next;
1472         return 1;
1473 }