]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/sed.c
Add exponential function.
[plan9front.git] / sys / src / cmd / sed.c
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10         DEPTH           = 20,           /* max nesting depth of {} */
11         MAXCMDS         = 512,          /* max sed commands */
12         ADDSIZE         = 10000,        /* size of add & read buffer */
13         MAXADDS         = 20,           /* max pending adds and reads */
14         LBSIZE          = 8192,         /* input line size */
15         LABSIZE         = 50,           /* max number of labels */
16         MAXSUB          = 10,           /* max number of sub reg exp */
17         MAXFILES        = 120,          /* max output files */
18 };
19
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25         enum {
26                 A_NONE,
27                 A_DOL,
28                 A_LINE,
29                 A_RE,
30                 A_LAST,
31         }type;
32         union {
33                 long    line;           /* Line # */
34                 Reprog  *rp;            /* Compiled R.E. */
35         };
36 } Addr;
37
38 typedef struct  SEDCOM {
39         Addr    ad1;                    /* optional start address */
40         Addr    ad2;                    /* optional end address */
41         union {
42                 Reprog  *re1;           /* compiled R.E. */
43                 Rune    *text;          /* added text or file name */
44                 struct  SEDCOM  *lb1;   /* destination command of branch */
45         };
46         Rune    *rhs;                   /* Right-hand side of substitution */
47         Biobuf* fcode;                  /* File ID for read and write */
48         char    command;                /* command code -see below */
49         char    gfl;                    /* 'Global' flag for substitutions */
50         char    pfl;                    /* 'print' flag for substitutions */
51         char    active;                 /* 1 => data between start and end */
52         char    negfl;                  /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM    01
57 #define BCOM    020
58 #define CCOM    02
59 #define CDCOM   025
60 #define CNCOM   022
61 #define COCOM   017
62 #define CPCOM   023
63 #define DCOM    03
64 #define ECOM    015
65 #define EQCOM   013
66 #define FCOM    016
67 #define GCOM    027
68 #define CGCOM   030
69 #define HCOM    031
70 #define CHCOM   032
71 #define ICOM    04
72 #define LCOM    05
73 #define NCOM    012
74 #define PCOM    010
75 #define QCOM    011
76 #define RCOM    06
77 #define SCOM    07
78 #define TCOM    021
79 #define WCOM    014
80 #define CWCOM   024
81 #define YCOM    026
82 #define XCOM    033
83
84 typedef struct label {                  /* Label symbol table */
85         Rune    uninm[9];               /* Label name */
86         SedCom  *chain;
87         SedCom  *address;               /* Command associated with label */
88 } Label;
89
90 typedef struct  FILE_CACHE {            /* Data file control block */
91         struct FILE_CACHE *next;        /* Forward Link */
92         char    *name;                  /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS];                 /* Command storage */
96 SedCom *pend = pspace+MAXCMDS;          /* End of command storage */
97 SedCom *rep = pspace;                   /* Current fill point */
98
99 Reprog  *lastre = 0;                    /* Last regular expression */
100 Resub   subexp[MAXSUB];                 /* sub-patterns of pattern match*/
101
102 Rune    addspace[ADDSIZE];              /* Buffer for a, c, & i commands */
103 Rune    *addend = addspace+ADDSIZE;
104
105 SedCom  *abuf[MAXADDS];                 /* Queue of pending adds & reads */
106 SedCom  **aptr = abuf;
107
108 struct {                                /* Sed program input control block */
109         enum PTYPE {                    /* Either on command line or in file */
110                 P_ARG,
111                 P_FILE,
112         } type;
113         union PCTL {                    /* Pointer to data */
114                 Biobuf  *bp;
115                 char    *curr;
116         };
117 } prog;
118
119 Rune    genbuf[LBSIZE];                 /* Miscellaneous buffer */
120
121 FileCache       *fhead = 0;             /* Head of File Cache Chain */
122 FileCache       *ftail = 0;             /* Tail of File Cache Chain */
123
124 Rune    *loc1;                          /* Start of pattern match */
125 Rune    *loc2;                          /* End of pattern match */
126 Rune    seof;                           /* Pattern delimiter char */
127
128 Rune    linebuf[LBSIZE+1];              /* Input data buffer */
129 Rune    *lbend = linebuf+LBSIZE;        /* End of buffer */
130 Rune    *spend = linebuf;               /* End of input data */
131 Rune    *cp;                            /* Current scan point in linebuf */
132
133 Rune    holdsp[LBSIZE+1];               /* Hold buffer */
134 Rune    *hend = holdsp+LBSIZE;          /* End of hold buffer */
135 Rune    *hspend = holdsp;               /* End of hold data */
136
137 int     nflag;                          /* Command line flags */
138 int     gflag;
139
140 int     dolflag;                        /* Set when at true EOF */
141 int     sflag;                          /* Set when substitution done */
142 int     jflag;                          /* Set when jump required */
143 int     delflag;                        /* Delete current line when set */
144
145 long    lnum = 0;                       /* Input line count */
146
147 char    fname[MAXFILES][40];            /* File name cache */
148 Biobuf  *fcode[MAXFILES];               /* File ID cache */
149 int     nfiles = 0;                     /* Cache fill point */
150
151 Biobuf  fout;                           /* Output stream */
152 Biobuf  stdin;                          /* Default input */
153 Biobuf* f = 0;                          /* Input data */
154
155 Label   ltab[LABSIZE];                  /* Label name symbol table */
156 Label   *labend = ltab+LABSIZE;         /* End of label table */
157 Label   *lab = ltab+1;                  /* Current Fill point */
158
159 int     depth = 0;                      /* {} stack pointer */
160
161 Rune    bad;                            /* Dummy err ptr reference */
162 Rune    *badp = &bad;
163
164
165 char    CGMES[]  =      "%S command garbled: %S";
166 char    TMMES[]  =      "Too much text: %S";
167 char    LTL[]    =      "Label too long: %S";
168 char    AD0MES[] =      "No addresses allowed: %S";
169 char    AD1MES[] =      "Only one address allowed: %S";
170
171 void    address(Addr *);
172 void    arout(void);
173 int     cmp(char *, char *);
174 int     rcmp(Rune *, Rune *);
175 void    command(SedCom *);
176 Reprog  *compile(void);
177 Rune    *compsub(Rune *, Rune *);
178 void    dechain(void);
179 void    dosub(Rune *);
180 int     ecmp(Rune *, Rune *, int);
181 void    enroll(char *);
182 void    errexit(void);
183 int     executable(SedCom *);
184 void    execute(void);
185 void    fcomp(void);
186 long    getrune(void);
187 Rune    *gline(Rune *);
188 int     match(Reprog *, Rune *);
189 void    newfile(enum PTYPE, char *);
190 int     opendata(void);
191 Biobuf  *open_file(char *);
192 Rune    *place(Rune *, Rune *, Rune *);
193 void    quit(char *, ...);
194 int     rline(Rune *, Rune *);
195 Label   *search(Label *);
196 int     substitute(SedCom *);
197 char    *text(char *);
198 Rune    *stext(Rune *, Rune *);
199 int     ycomp(SedCom *);
200 char *  trans(int c);
201 void    putline(Biobuf *bp, Rune *buf, int n);
202
203 void
204 main(int argc, char **argv)
205 {
206         int compfl;
207
208         lnum = 0;
209         Binit(&fout, 1, OWRITE);
210         Blethal(&fout, nil);
211         fcode[nfiles++] = &fout;
212         compfl = 0;
213
214         if(argc == 1)
215                 exits(0);
216         ARGBEGIN{
217         case 'e':
218                 if (argc <= 1)
219                         quit("missing pattern");
220                 newfile(P_ARG, ARGF());
221                 fcomp();
222                 compfl = 1;
223                 continue;
224         case 'f':
225                 if(argc <= 1)
226                         quit("no pattern-file");
227                 newfile(P_FILE, ARGF());
228                 fcomp();
229                 compfl = 1;
230                 continue;
231         case 'g':
232                 gflag++;
233                 continue;
234         case 'n':
235                 nflag++;
236                 continue;
237         default:
238                 fprint(2, "sed: Unknown flag: %c\n", ARGC());
239                 continue;
240         } ARGEND
241
242         if(compfl == 0) {
243                 if (--argc < 0)
244                         quit("missing pattern");
245                 newfile(P_ARG, *argv++);
246                 fcomp();
247         }
248
249         if(depth)
250                 quit("Too many {'s");
251
252         ltab[0].address = rep;
253
254         dechain();
255
256         if(argc <= 0)
257                 enroll(0);              /* Add stdin to cache */
258         else
259                 while(--argc >= 0)
260                         enroll(*argv++);
261         execute();
262         exits(0);
263 }
264
265 void
266 fcomp(void)
267 {
268         int     i;
269         Label   *lpt;
270         Rune    *tp;
271         SedCom  *pt, *pt1;
272         static Rune     *p = addspace;
273         static SedCom   **cmpend[DEPTH];        /* stack of {} operations */
274
275         while (rline(linebuf, lbend) >= 0) {
276                 cp = linebuf;
277 comploop:
278                 while(*cp == L' ' || *cp == L'\t')
279                         cp++;
280                 if(*cp == L'\0' || *cp == L'#')
281                         continue;
282                 if(*cp == L';') {
283                         cp++;
284                         goto comploop;
285                 }
286
287                 address(&rep->ad1);
288                 if (rep->ad1.type != A_NONE) {
289                         if (rep->ad1.type == A_LAST) {
290                                 if (!lastre)
291                                         quit("First RE may not be null");
292                                 rep->ad1.type = A_RE;
293                                 rep->ad1.rp = lastre;
294                         }
295                         if(*cp == L',' || *cp == L';') {
296                                 cp++;
297                                 address(&rep->ad2);
298                                 if (rep->ad2.type == A_LAST) {
299                                         rep->ad2.type = A_RE;
300                                         rep->ad2.rp = lastre;
301                                 }
302                         } else
303                                 rep->ad2.type = A_NONE;
304                 }
305                 while(*cp == L' ' || *cp == L'\t')
306                         cp++;
307
308 swit:
309                 switch(*cp++) {
310                 default:
311                         quit("Unrecognized command: %S", linebuf);
312
313                 case '!':
314                         rep->negfl = 1;
315                         goto swit;
316
317                 case '{':
318                         rep->command = BCOM;
319                         rep->negfl = !rep->negfl;
320                         cmpend[depth++] = &rep->lb1;
321                         if(++rep >= pend)
322                                 quit("Too many commands: %S", linebuf);
323                         if(*cp == '\0')
324                                 continue;
325                         goto comploop;
326
327                 case '}':
328                         if(rep->ad1.type != A_NONE)
329                                 quit(AD0MES, linebuf);
330                         if(--depth < 0)
331                                 quit("Too many }'s");
332                         *cmpend[depth] = rep;
333                         if(*cp == 0)
334                                 continue;
335                         goto comploop;
336
337                 case '=':
338                         rep->command = EQCOM;
339                         if(rep->ad2.type != A_NONE)
340                                 quit(AD1MES, linebuf);
341                         break;
342
343                 case ':':
344                         if(rep->ad1.type != A_NONE)
345                                 quit(AD0MES, linebuf);
346
347                         while(*cp == L' ')
348                                 cp++;
349                         tp = lab->uninm;
350                         while (*cp && *cp != L';' && *cp != L' ' &&
351                             *cp != L'\t' && *cp != L'#') {
352                                 *tp++ = *cp++;
353                                 if(tp >= &lab->uninm[8])
354                                         quit(LTL, linebuf);
355                         }
356                         *tp = L'\0';
357
358                         if (*lab->uninm == L'\0')               /* no label? */
359                                 quit(CGMES, L":", linebuf);
360                         if(lpt = search(lab)) {
361                                 if(lpt->address)
362                                         quit("Duplicate labels: %S", linebuf);
363                         } else {
364                                 lab->chain = 0;
365                                 lpt = lab;
366                                 if(++lab >= labend)
367                                         quit("Too many labels: %S", linebuf);
368                         }
369                         lpt->address = rep;
370                         if (*cp == L'#')
371                                 continue;
372                         rep--;                  /* reuse this slot */
373                         break;
374
375                 case 'a':
376                         rep->command = ACOM;
377                         if(rep->ad2.type != A_NONE)
378                                 quit(AD1MES, linebuf);
379                         if(*cp == L'\\')
380                                 cp++;
381                         if(*cp++ != L'\n')
382                                 quit(CGMES, L"a", linebuf);
383                         rep->text = p;
384                         p = stext(p, addend);
385                         break;
386                 case 'c':
387                         rep->command = CCOM;
388                         if(*cp == L'\\')
389                                 cp++;
390                         if(*cp++ != L'\n')
391                                 quit(CGMES, L"c", linebuf);
392                         rep->text = p;
393                         p = stext(p, addend);
394                         break;
395                 case 'i':
396                         rep->command = ICOM;
397                         if(rep->ad2.type != A_NONE)
398                                 quit(AD1MES, linebuf);
399                         if(*cp == L'\\')
400                                 cp++;
401                         if(*cp++ != L'\n')
402                                 quit(CGMES, L"i", linebuf);
403                         rep->text = p;
404                         p = stext(p, addend);
405                         break;
406
407                 case 'g':
408                         rep->command = GCOM;
409                         break;
410
411                 case 'G':
412                         rep->command = CGCOM;
413                         break;
414
415                 case 'h':
416                         rep->command = HCOM;
417                         break;
418
419                 case 'H':
420                         rep->command = CHCOM;
421                         break;
422
423                 case 't':
424                         rep->command = TCOM;
425                         goto jtcommon;
426
427                 case 'b':
428                         rep->command = BCOM;
429 jtcommon:
430                         while(*cp == L' ')
431                                 cp++;
432                         if(*cp == L'\0' || *cp == L';') {
433                                 /* no label; jump to end */
434                                 if(pt = ltab[0].chain) {
435                                         while((pt1 = pt->lb1) != nil)
436                                                 pt = pt1;
437                                         pt->lb1 = rep;
438                                 } else
439                                         ltab[0].chain = rep;
440                                 break;
441                         }
442
443                         /* copy label into lab->uninm */
444                         tp = lab->uninm;
445                         while((*tp = *cp++) != L'\0' && *tp != L';')
446                                 if(++tp >= &lab->uninm[8])
447                                         quit(LTL, linebuf);
448                         cp--;
449                         *tp = L'\0';
450
451                         if (*lab->uninm == L'\0')
452                                 /* shouldn't get here */
453                                 quit(CGMES, L"b or t", linebuf);
454                         if((lpt = search(lab)) != nil) {
455                                 if(lpt->address)
456                                         rep->lb1 = lpt->address;
457                                 else {
458                                         for(pt = lpt->chain; pt != nil &&
459                                             (pt1 = pt->lb1) != nil; pt = pt1)
460                                                 ;
461                                         if (pt)
462                                                 pt->lb1 = rep;
463                                 }
464                         } else {                        /* add new label */
465                                 lab->chain = rep;
466                                 lab->address = 0;
467                                 if(++lab >= labend)
468                                         quit("Too many labels: %S", linebuf);
469                         }
470                         break;
471
472                 case 'n':
473                         rep->command = NCOM;
474                         break;
475
476                 case 'N':
477                         rep->command = CNCOM;
478                         break;
479
480                 case 'p':
481                         rep->command = PCOM;
482                         break;
483
484                 case 'P':
485                         rep->command = CPCOM;
486                         break;
487
488                 case 'r':
489                         rep->command = RCOM;
490                         if(rep->ad2.type != A_NONE)
491                                 quit(AD1MES, linebuf);
492                         if(*cp++ != L' ')
493                                 quit(CGMES, L"r", linebuf);
494                         rep->text = p;
495                         p = stext(p, addend);
496                         break;
497
498                 case 'd':
499                         rep->command = DCOM;
500                         break;
501
502                 case 'D':
503                         rep->command = CDCOM;
504                         rep->lb1 = pspace;
505                         break;
506
507                 case 'q':
508                         rep->command = QCOM;
509                         if(rep->ad2.type != A_NONE)
510                                 quit(AD1MES, linebuf);
511                         break;
512
513                 case 'l':
514                         rep->command = LCOM;
515                         break;
516
517                 case 's':
518                         rep->command = SCOM;
519                         seof = *cp++;
520                         if ((rep->re1 = compile()) == 0) {
521                                 if(!lastre)
522                                         quit("First RE may not be null.");
523                                 rep->re1 = lastre;
524                         }
525                         rep->rhs = p;
526                         if((p = compsub(p, addend)) == 0)
527                                 quit(CGMES, L"s", linebuf);
528                         if(*cp == L'g') {
529                                 cp++;
530                                 rep->gfl++;
531                         } else if(gflag)
532                                 rep->gfl++;
533
534                         if(*cp == L'p') {
535                                 cp++;
536                                 rep->pfl = 1;
537                         }
538
539                         if(*cp == L'P') {
540                                 cp++;
541                                 rep->pfl = 2;
542                         }
543
544                         if(*cp == L'w') {
545                                 cp++;
546                                 if(*cp++ !=  L' ')
547                                         quit(CGMES, L"s", linebuf);
548                                 text(fname[nfiles]);
549                                 for(i = nfiles - 1; i >= 0; i--)
550                                         if(cmp(fname[nfiles], fname[i]) == 0) {
551                                                 rep->fcode = fcode[i];
552                                                 goto done;
553                                         }
554                                 if(nfiles >= MAXFILES)
555                                         quit("Too many files in w commands 1");
556                                 rep->fcode = open_file(fname[nfiles]);
557                         }
558                         break;
559
560                 case 'w':
561                         rep->command = WCOM;
562                         if(*cp++ != L' ')
563                                 quit(CGMES, L"w", linebuf);
564                         text(fname[nfiles]);
565                         for(i = nfiles - 1; i >= 0; i--)
566                                 if(cmp(fname[nfiles], fname[i]) == 0) {
567                                         rep->fcode = fcode[i];
568                                         goto done;
569                                 }
570                         if(nfiles >= MAXFILES){
571                                 fprint(2, "sed: Too many files in w commands 2 \n");
572                                 fprint(2, "nfiles = %d; MAXF = %d\n",
573                                         nfiles, MAXFILES);
574                                 errexit();
575                         }
576                         rep->fcode = open_file(fname[nfiles]);
577                         break;
578
579                 case 'x':
580                         rep->command = XCOM;
581                         break;
582
583                 case 'y':
584                         rep->command = YCOM;
585                         seof = *cp++;
586                         if (ycomp(rep) == 0)
587                                 quit(CGMES, L"y", linebuf);
588                         break;
589
590                 }
591 done:
592                 if(++rep >= pend)
593                         quit("Too many commands, last: %S", linebuf);
594                 if(*cp++ != L'\0') {
595                         if(cp[-1] == L';')
596                                 goto comploop;
597                         quit(CGMES, cp - 1, linebuf);
598                 }
599         }
600 }
601
602 Biobuf *
603 open_file(char *name)
604 {
605         int fd;
606         Biobuf *bp;
607
608         if ((bp = malloc(sizeof(Biobuf))) == 0)
609                 quit("Out of memory");
610         if ((fd = open(name, OWRITE)) < 0 &&
611             (fd = create(name, OWRITE, 0666)) < 0)
612                 quit("Cannot create %s", name);
613         Binit(bp, fd, OWRITE);
614         Blethal(bp, nil);
615         Bseek(bp, 0, 2);
616         fcode[nfiles++] = bp;
617         return bp;
618 }
619
620 Rune *
621 compsub(Rune *rhs, Rune *end)
622 {
623         Rune r;
624
625         while ((r = *cp++) != '\0') {
626                 if(r == '\\') {
627                         if (rhs < end)
628                                 *rhs++ = 0xFFFF;
629                         else
630                                 return 0;
631                         r = *cp++;
632                         if(r == 'n')
633                                 r = '\n';
634                 } else {
635                         if(r == seof) {
636                                 if (rhs < end)
637                                         *rhs++ = '\0';
638                                 else
639                                         return 0;
640                                 return rhs;
641                         }
642                 }
643                 if (rhs < end)
644                         *rhs++ = r;
645                 else
646                         return 0;
647         }
648         return 0;
649 }
650
651 Reprog *
652 compile(void)
653 {
654         Rune c;
655         char *ep;
656         char expbuf[512];
657
658         if((c = *cp++) == seof)         /* L'//' */
659                 return 0;
660         ep = expbuf;
661         do {
662                 if (c == L'\0' || c == L'\n')
663                         quit(TMMES, linebuf);
664                 if (c == L'\\') {
665                         if (ep >= expbuf+sizeof(expbuf))
666                                 quit(TMMES, linebuf);
667                         ep += runetochar(ep, &c);
668                         if ((c = *cp++) == L'n')
669                                 c = L'\n';
670                 }
671                 if (ep >= expbuf + sizeof(expbuf))
672                         quit(TMMES, linebuf);
673                 ep += runetochar(ep, &c);
674         } while ((c = *cp++) != seof);
675         *ep = 0;
676         return lastre = regcomp(expbuf);
677 }
678
679 void
680 regerror(char *s)
681 {
682         USED(s);
683         quit(CGMES, L"r.e.-using", linebuf);
684 }
685
686 void
687 newfile(enum PTYPE type, char *name)
688 {
689         if (type == P_ARG)
690                 prog.curr = name;
691         else {
692                 if ((prog.bp = Bopen(name, OREAD)) == 0)
693                         quit("Cannot open pattern-file: %s\n", name);
694                 Blethal(prog.bp, nil);
695         }
696         prog.type = type;
697 }
698
699 int
700 rline(Rune *buf, Rune *end)
701 {
702         long c;
703         Rune r;
704
705         while ((c = getrune()) >= 0) {
706                 r = c;
707                 if (r == '\\') {
708                         if (buf <= end)
709                                 *buf++ = r;
710                         if ((c = getrune()) < 0)
711                                 break;
712                         r = c;
713                 } else if (r == '\n') {
714                         *buf = '\0';
715                         return 1;
716                 }
717                 if (buf <= end)
718                         *buf++ = r;
719         }
720         *buf = '\0';
721         return -1;
722 }
723
724 long
725 getrune(void)
726 {
727         long c;
728         Rune r;
729         char *p;
730
731         if (prog.type == P_ARG) {
732                 if ((p = prog.curr) != 0) {
733                         if (*p) {
734                                 prog.curr += chartorune(&r, p);
735                                 c = r;
736                         } else {
737                                 c = '\n';       /* fake an end-of-line */
738                                 prog.curr = 0;
739                         }
740                 } else
741                         c = -1;
742         } else if ((c = Bgetrune(prog.bp)) < 0)
743                 Bterm(prog.bp);
744         return c;
745 }
746
747 void
748 address(Addr *ap)
749 {
750         int c;
751         long lno;
752
753         if((c = *cp++) == '$')
754                 ap->type = A_DOL;
755         else if(c == '/') {
756                 seof = c;
757                 if (ap->rp = compile())
758                         ap->type = A_RE;
759                 else
760                         ap->type = A_LAST;
761         }
762         else if (c >= '0' && c <= '9') {
763                 lno = c - '0';
764                 while ((c = *cp) >= '0' && c <= '9')
765                         lno = lno*10 + *cp++ - '0';
766                 if(!lno)
767                         quit("line number 0 is illegal",0);
768                 ap->type = A_LINE;
769                 ap->line = lno;
770         }
771         else {
772                 cp--;
773                 ap->type = A_NONE;
774         }
775 }
776
777 cmp(char *a, char *b)           /* compare characters */
778 {
779         while(*a == *b++)
780                 if (*a == '\0')
781                         return 0;
782                 else
783                         a++;
784         return 1;
785 }
786 rcmp(Rune *a, Rune *b)          /* compare runes */
787 {
788         while(*a == *b++)
789                 if (*a == '\0')
790                         return 0;
791                 else
792                         a++;
793         return 1;
794 }
795
796 char *
797 text(char *p)           /* extract character string */
798 {
799         Rune r;
800
801         while(*cp == ' ' || *cp == '\t')
802                 cp++;
803         while (*cp) {
804                 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
805                         break;
806                 if (r == '\n')
807                         while (*cp == ' ' || *cp == '\t')
808                                 cp++;
809                 p += runetochar(p, &r);
810         }
811         *p++ = '\0';
812         return p;
813 }
814
815 Rune *
816 stext(Rune *p, Rune *end)               /* extract rune string */
817 {
818         while(*cp == L' ' || *cp == L'\t')
819                 cp++;
820         while (*cp) {
821                 if (*cp == L'\\' && *++cp == L'\0')
822                         break;
823                 if (p >= end-1)
824                         quit(TMMES, linebuf);
825                 if ((*p++ = *cp++) == L'\n')
826                         while(*cp == L' ' || *cp == L'\t')
827                                 cp++;
828         }
829         *p++ = 0;
830         return p;
831 }
832
833
834 Label *
835 search(Label *ptr)
836 {
837         Label   *rp;
838
839         for (rp = ltab; rp < ptr; rp++)
840                 if(rcmp(rp->uninm, ptr->uninm) == 0)
841                         return(rp);
842         return(0);
843 }
844
845 void
846 dechain(void)
847 {
848         Label   *lptr;
849         SedCom  *rptr, *trptr;
850
851         for(lptr = ltab; lptr < lab; lptr++) {
852                 if(lptr->address == 0)
853                         quit("Undefined label: %S", lptr->uninm);
854                 if(lptr->chain) {
855                         rptr = lptr->chain;
856                         while((trptr = rptr->lb1) != nil) {
857                                 rptr->lb1 = lptr->address;
858                                 rptr = trptr;
859                         }
860                         rptr->lb1 = lptr->address;
861                 }
862         }
863 }
864
865 int
866 ycomp(SedCom *r)
867 {
868         int i;
869         Rune *rp, *sp, *tsp;
870         Rune c, highc;
871
872         highc = 0;
873         for(tsp = cp; *tsp != seof; tsp++) {
874                 if(*tsp == L'\\')
875                         tsp++;
876                 if(*tsp == L'\n' || *tsp == L'\0')
877                         return 0;
878                 if (*tsp > highc)
879                         highc = *tsp;
880         }
881         tsp++;
882         if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
883                 quit("Out of memory");
884         *rp++ = highc;                          /* save upper bound */
885         for (i = 0; i <= highc; i++)
886                 rp[i] = i;
887         sp = cp;
888         while((c = *sp++) != seof) {
889                 if(c == L'\\' && *sp == L'n') {
890                         sp++;
891                         c = L'\n';
892                 }
893                 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
894                         rp[c] = L'\n';
895                         tsp++;
896                 }
897                 if(rp[c] == seof || rp[c] == L'\0') {
898                         free(r->re1);
899                         r->re1 = nil;
900                         return 0;
901                 }
902         }
903         if(*tsp != seof) {
904                 free(r->re1);
905                 r->re1 = nil;
906                 return 0;
907         }
908         cp = tsp+1;
909         return 1;
910 }
911
912 void
913 execute(void)
914 {
915         SedCom  *ipc;
916
917         while (spend = gline(linebuf)){
918                 for(ipc = pspace; ipc->command; ) {
919                         if (!executable(ipc)) {
920                                 ipc++;
921                                 continue;
922                         }
923                         command(ipc);
924
925                         if(delflag)
926                                 break;
927                         if(jflag) {
928                                 jflag = 0;
929                                 if((ipc = ipc->lb1) == 0)
930                                         break;
931                         } else
932                                 ipc++;
933                 }
934                 if(!nflag && !delflag)
935                         putline(&fout, linebuf, spend - linebuf);
936                 if(aptr > abuf)
937                         arout();
938                 delflag = 0;
939         }
940 }
941
942 /* determine if a statement should be applied to an input line */
943 int
944 executable(SedCom *ipc)
945 {
946         if (ipc->active) {      /* Addr1 satisfied - accept until Addr2 */
947                 if (ipc->active == 1)           /* Second line */
948                         ipc->active = 2;
949                 switch(ipc->ad2.type) {
950                 case A_NONE:            /* No second addr; use first */
951                         ipc->active = 0;
952                         break;
953                 case A_DOL:             /* Accept everything */
954                         return !ipc->negfl;
955                 case A_LINE:            /* Line at end of range? */
956                         if (lnum <= ipc->ad2.line) {
957                                 if (ipc->ad2.line == lnum)
958                                         ipc->active = 0;
959                                 return !ipc->negfl;
960                         }
961                         ipc->active = 0;        /* out of range */
962                         return ipc->negfl;
963                 case A_RE:              /* Check for matching R.E. */
964                         if (match(ipc->ad2.rp, linebuf))
965                                 ipc->active = 0;
966                         return !ipc->negfl;
967                 default:
968                         quit("Internal error");
969                 }
970         }
971         switch (ipc->ad1.type) {        /* Check first address */
972         case A_NONE:                    /* Everything matches */
973                 return !ipc->negfl;
974         case A_DOL:                     /* Only last line */
975                 if (dolflag)
976                         return !ipc->negfl;
977                 break;
978         case A_LINE:                    /* Check line number */
979                 if (ipc->ad1.line == lnum) {
980                         ipc->active = 1;        /* In range */
981                         return !ipc->negfl;
982                 }
983                 break;
984         case A_RE:                      /* Check R.E. */
985                 if (match(ipc->ad1.rp, linebuf)) {
986                         ipc->active = 1;        /* In range */
987                         return !ipc->negfl;
988                 }
989                 break;
990         default:
991                 quit("Internal error");
992         }
993         return ipc->negfl;
994 }
995
996 int
997 match(Reprog *pattern, Rune *buf)
998 {
999         if (!pattern)
1000                 return 0;
1001         subexp[0].rsp = buf;
1002         subexp[0].ep = 0;
1003         if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
1004                 loc1 = subexp[0].rsp;
1005                 loc2 = subexp[0].rep;
1006                 return 1;
1007         }
1008         loc1 = loc2 = 0;
1009         return 0;
1010 }
1011
1012 int
1013 substitute(SedCom *ipc)
1014 {
1015         int len;
1016
1017         if(!match(ipc->re1, linebuf))
1018                 return 0;
1019
1020         /*
1021          * we have at least one match.  some patterns, e.g. '$' or '^', can
1022          * produce 0-length matches, so during a global substitute we must
1023          * bump to the character after a 0-length match to keep from looping.
1024          */
1025         sflag = 1;
1026         if(ipc->gfl == 0)                       /* single substitution */
1027                 dosub(ipc->rhs);
1028         else
1029                 do{                             /* global substitution */
1030                         len = loc2 - loc1;      /* length of match */
1031                         dosub(ipc->rhs);        /* dosub moves loc2 */
1032                         if(*loc2 == 0)          /* end of string */
1033                                 break;
1034                         if(len == 0)            /* zero-length R.E. match */
1035                                 loc2++;         /* bump over 0-length match */
1036                         if(*loc2 == 0)          /* end of string */
1037                                 break;
1038                 } while(match(ipc->re1, loc2));
1039         return 1;
1040 }
1041
1042 void
1043 dosub(Rune *rhsbuf)
1044 {
1045         int c, n;
1046         Rune *lp, *sp, *rp;
1047
1048         lp = linebuf;
1049         sp = genbuf;
1050         rp = rhsbuf;
1051         while (lp < loc1)
1052                 *sp++ = *lp++;
1053         while(c = *rp++) {
1054                 if (c == '&') {
1055                         sp = place(sp, loc1, loc2);
1056                         continue;
1057                 }
1058                 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1059                         n = c-'0';
1060                         if (subexp[n].rsp && subexp[n].rep) {
1061                                 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1062                                 continue;
1063                         }
1064                         else {
1065                                 fprint(2, "sed: Invalid back reference \\%d\n",n);
1066                                 errexit();
1067                         }
1068                 }
1069                 *sp++ = c;
1070                 if (sp >= &genbuf[LBSIZE])
1071                         fprint(2, "sed: Output line too long.\n");
1072         }
1073         lp = loc2;
1074         loc2 = sp - genbuf + linebuf;
1075         while (*sp++ = *lp++)
1076                 if (sp >= &genbuf[LBSIZE])
1077                         fprint(2, "sed: Output line too long.\n");
1078         lp = linebuf;
1079         sp = genbuf;
1080         while (*lp++ = *sp++)
1081                 ;
1082         spend = lp - 1;
1083 }
1084
1085 Rune *
1086 place(Rune *sp, Rune *l1, Rune *l2)
1087 {
1088         while (l1 < l2) {
1089                 *sp++ = *l1++;
1090                 if (sp >= &genbuf[LBSIZE])
1091                         fprint(2, "sed: Output line too long.\n");
1092         }
1093         return sp;
1094 }
1095
1096 char *
1097 trans(int c)
1098 {
1099         static char buf[] = "\\x0000";
1100         static char hex[] = "0123456789abcdef";
1101
1102         switch(c) {
1103         case '\b':
1104                 return "\\b";
1105         case '\n':
1106                 return "\\n";
1107         case '\r':
1108                 return "\\r";
1109         case '\t':
1110                 return "\\t";
1111         case '\\':
1112                 return "\\\\";
1113         }
1114         buf[2] = hex[(c>>12)&0xF];
1115         buf[3] = hex[(c>>8)&0xF];
1116         buf[4] = hex[(c>>4)&0xF];
1117         buf[5] = hex[c&0xF];
1118         return buf;
1119 }
1120
1121 void
1122 command(SedCom *ipc)
1123 {
1124         int i, c;
1125         char *ucp;
1126         Rune *execp, *p1, *p2, *rp;
1127
1128         switch(ipc->command) {
1129         case ACOM:
1130                 *aptr++ = ipc;
1131                 if(aptr >= abuf+MAXADDS)
1132                         quit("sed: Too many appends after line %ld\n",
1133                                 (char *)lnum);
1134                 *aptr = 0;
1135                 break;
1136         case CCOM:
1137                 delflag = 1;
1138                 if(ipc->active == 1) {
1139                         for(rp = ipc->text; *rp; rp++)
1140                                 Bputrune(&fout, *rp);
1141                         Bputc(&fout, '\n');
1142                 }
1143                 break;
1144         case DCOM:
1145                 delflag++;
1146                 break;
1147         case CDCOM:
1148                 p1 = p2 = linebuf;
1149                 while(*p1 != '\n') {
1150                         if(*p1++ == 0) {
1151                                 delflag++;
1152                                 return;
1153                         }
1154                 }
1155                 p1++;
1156                 while(*p2++ = *p1++)
1157                         ;
1158                 spend = p2 - 1;
1159                 jflag++;
1160                 break;
1161         case EQCOM:
1162                 Bprint(&fout, "%ld\n", lnum);
1163                 break;
1164         case GCOM:
1165                 p1 = linebuf;
1166                 p2 = holdsp;
1167                 while(*p1++ = *p2++)
1168                         ;
1169                 spend = p1 - 1;
1170                 break;
1171         case CGCOM:
1172                 *spend++ = '\n';
1173                 p1 = spend;
1174                 p2 = holdsp;
1175                 while(*p1++ = *p2++)
1176                         if(p1 >= lbend)
1177                                 break;
1178                 spend = p1 - 1;
1179                 break;
1180         case HCOM:
1181                 p1 = holdsp;
1182                 p2 = linebuf;
1183                 while(*p1++ = *p2++);
1184                 hspend = p1 - 1;
1185                 break;
1186         case CHCOM:
1187                 *hspend++ = '\n';
1188                 p1 = hspend;
1189                 p2 = linebuf;
1190                 while(*p1++ = *p2++)
1191                         if(p1 >= hend)
1192                                 break;
1193                 hspend = p1 - 1;
1194                 break;
1195         case ICOM:
1196                 for(rp = ipc->text; *rp; rp++)
1197                         Bputrune(&fout, *rp);
1198                 Bputc(&fout, '\n');
1199                 break;
1200         case BCOM:
1201                 jflag = 1;
1202                 break;
1203         case LCOM:
1204                 c = 0;
1205                 for (i = 0, rp = linebuf; *rp; rp++) {
1206                         c = *rp;
1207                         if(c >= 0x20 && c < 0x7F && c != '\\') {
1208                                 Bputc(&fout, c);
1209                                 if(i++ > 71) {
1210                                         Bprint(&fout, "\\\n");
1211                                         i = 0;
1212                                 }
1213                         } else {
1214                                 for (ucp = trans(*rp); *ucp; ucp++){
1215                                         c = *ucp;
1216                                         Bputc(&fout, c);
1217                                         if(i++ > 71) {
1218                                                 Bprint(&fout, "\\\n");
1219                                                 i = 0;
1220                                         }
1221                                 }
1222                         }
1223                 }
1224                 if(c == ' ')
1225                         Bprint(&fout, "\\n");
1226                 Bputc(&fout, '\n');
1227                 break;
1228         case NCOM:
1229                 if(!nflag)
1230                         putline(&fout, linebuf, spend-linebuf);
1231
1232                 if(aptr > abuf)
1233                         arout();
1234                 if((execp = gline(linebuf)) == 0) {
1235                         delflag = 1;
1236                         break;
1237                 }
1238                 spend = execp;
1239                 break;
1240         case CNCOM:
1241                 if(aptr > abuf)
1242                         arout();
1243                 *spend++ = '\n';
1244                 if((execp = gline(spend)) == 0) {
1245                         delflag = 1;
1246                         break;
1247                 }
1248                 spend = execp;
1249                 break;
1250         case PCOM:
1251                 putline(&fout, linebuf, spend-linebuf);
1252                 break;
1253         case CPCOM:
1254 cpcom:
1255                 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1256                         Bputc(&fout, *rp);
1257                 Bputc(&fout, '\n');
1258                 break;
1259         case QCOM:
1260                 if(!nflag)
1261                         putline(&fout, linebuf, spend-linebuf);
1262                 if(aptr > abuf)
1263                         arout();
1264                 exits(0);
1265         case RCOM:
1266                 *aptr++ = ipc;
1267                 if(aptr >= &abuf[MAXADDS])
1268                         quit("sed: Too many reads after line %ld\n",
1269                                 (char *)lnum);
1270                 *aptr = 0;
1271                 break;
1272         case SCOM:
1273                 i = substitute(ipc);
1274                 if(i && ipc->pfl)
1275                         if(ipc->pfl == 1)
1276                                 putline(&fout, linebuf, spend-linebuf);
1277                         else
1278                                 goto cpcom;
1279                 if(i && ipc->fcode)
1280                         goto wcom;
1281                 break;
1282
1283         case TCOM:
1284                 if(sflag) {
1285                         sflag = 0;
1286                         jflag = 1;
1287                 }
1288                 break;
1289
1290         case WCOM:
1291 wcom:
1292                 putline(ipc->fcode,linebuf, spend - linebuf);
1293                 break;
1294         case XCOM:
1295                 p1 = linebuf;
1296                 p2 = genbuf;
1297                 while(*p2++ = *p1++)
1298                         ;
1299                 p1 = holdsp;
1300                 p2 = linebuf;
1301                 while(*p2++ = *p1++)
1302                         ;
1303                 spend = p2 - 1;
1304                 p1 = genbuf;
1305                 p2 = holdsp;
1306                 while(*p2++ = *p1++)
1307                         ;
1308                 hspend = p2 - 1;
1309                 break;
1310         case YCOM:
1311                 p1 = linebuf;
1312                 p2 = ipc->text;
1313                 for (i = *p2++; *p1; p1++)
1314                         if (*p1 <= i)
1315                                 *p1 = p2[*p1];
1316                 break;
1317         }
1318 }
1319
1320 void
1321 putline(Biobuf *bp, Rune *buf, int n)
1322 {
1323         while (n--)
1324                 Bputrune(bp, *buf++);
1325         Bputc(bp, '\n');
1326 }
1327 ecmp(Rune *a, Rune *b, int count)
1328 {
1329         while(count--)
1330                 if(*a++ != *b++)
1331                         return 0;
1332         return 1;
1333 }
1334
1335 void
1336 arout(void)
1337 {
1338         int     c;
1339         char    *s;
1340         char    buf[128];
1341         Rune    *p1;
1342         Biobuf  *fi;
1343
1344         for (aptr = abuf; *aptr; aptr++) {
1345                 if((*aptr)->command == ACOM) {
1346                         for(p1 = (*aptr)->text; *p1; p1++ )
1347                                 Bputrune(&fout, *p1);
1348                         Bputc(&fout, '\n');
1349                 } else {
1350                         for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1351                                 s += runetochar(s, p1);
1352                         *s = '\0';
1353                         if((fi = Bopen(buf, OREAD)) == 0)
1354                                 continue;
1355                         Blethal(fi, nil);
1356                         while((c = Bgetc(fi)) >= 0)
1357                                 Bputc(&fout, c);
1358                         Bterm(fi);
1359                 }
1360         }
1361         aptr = abuf;
1362         *aptr = 0;
1363 }
1364
1365 void
1366 errexit(void)
1367 {
1368         exits("error");
1369 }
1370
1371 void
1372 quit(char *fmt, ...)
1373 {
1374         char *p, *ep;
1375         char msg[256];
1376         va_list arg;
1377
1378         ep = msg + sizeof msg;
1379         p = seprint(msg, ep, "sed: ");
1380         va_start(arg, fmt);
1381         p = vseprint(p, ep, fmt, arg);
1382         va_end(arg);
1383         p = seprint(p, ep, "\n");
1384         write(2, msg, p - msg);
1385         errexit();
1386 }
1387
1388 Rune *
1389 gline(Rune *addr)
1390 {
1391         long c;
1392         Rune *p;
1393         static long peekc = 0;
1394
1395         if (f == 0 && opendata() < 0)
1396                 return 0;
1397         sflag = 0;
1398         lnum++;
1399 /*      Bflush(&fout);********* dumped 4/30/92 - bobf****/
1400         do {
1401                 p = addr;
1402                 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1403                         if (c == '\n') {
1404                                 if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1405                                         dolflag = 1;
1406                                 *p = '\0';
1407                                 return p;
1408                         }
1409                         if (c && p < lbend)
1410                                 *p++ = c;
1411                 }
1412                 /* return partial final line, adding implicit newline */
1413                 if(p != addr) {
1414                         *p = '\0';
1415                         peekc = -1;
1416                         if (fhead == 0)
1417                                 dolflag = 1;
1418                         return p;
1419                 }
1420                 peekc = 0;
1421                 Bterm(f);
1422         } while (opendata() > 0);               /* Switch to next stream */
1423         f = 0;
1424         return 0;
1425 }
1426
1427 /*
1428  * Data file input section - the intent is to transparently
1429  *      catenate all data input streams.
1430  */
1431 void
1432 enroll(char *filename)          /* Add a file to the input file cache */
1433 {
1434         FileCache *fp;
1435
1436         if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1437                 quit("Out of memory");
1438         if (ftail == nil)
1439                 fhead = fp;
1440         else
1441                 ftail->next = fp;
1442         ftail = fp;
1443         fp->next = nil;
1444         fp->name = filename;            /* 0 => stdin */
1445 }
1446
1447 int
1448 opendata(void)
1449 {
1450         if (fhead == nil)
1451                 return -1;
1452         if (fhead->name) {
1453                 if ((f = Bopen(fhead->name, OREAD)) == nil)
1454                         quit("Can't open %s", fhead->name);
1455         } else {
1456                 Binit(&stdin, 0, OREAD);
1457                 f = &stdin;
1458         }
1459         Blethal(f, nil);
1460         fhead = fhead->next;
1461         return 1;
1462 }