6 * Deroff command -- strip troff, eqn, and tbl sequences from
7 * a file. Has three flags argument, -w, to cause output one word per line
8 * rather than in the original format.
9 * -mm (or -ms) causes the corresponding macro's to be interpreted
10 * so that just sentences are output
11 * -ml also gets rid of lists.
12 * -i causes deroff to ignore .so and .nx commands.
13 * Deroff follows .so and .nx commands, removes contents of macro
14 * definitions, equations (both .EQ ... .EN and $...$),
15 * Tbl command sequences, and Troff backslash vconstructions.
17 * All input is through the C macro; the most recently read character is in c.
21 #define C ((c = Bgetrune(infile)) < 0?\
23 ((c == ldelim) && (filesp == files)?\
29 #define C1 ((c = Bgetrune(infile)) == Beof?\
36 /* lose those macros! */
40 #define SKIP while(C != '\n')
41 #define SKIP1 while(C1 != '\n')
42 #define SKIP_TO_COM SKIP;\
45 while(C != '.' || pc != '\n' || C > 'Z')\
56 #define EXTENDED -1 /* All runes above 0x7F */
66 int underscoreflag = NO;
75 #define MAX_ASCII 0X80
77 char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
94 char* devnull = "/dev/null";
103 void fatal(char *s, char *p);
106 void putmac(Rune *rp, int vconst);
107 void regline(int macline, int vconst);
114 void sdis(char a1, char a2);
117 char* copys(char *s);
124 c = Bgetrune(infile);
127 if(c == ldelim && filesp == files)
137 c = Bgetrune(infile);
146 main(int argc, char *av[])
152 Binit(&bout, 1, OWRITE);
159 underscoreflag = YES;
166 case 'm': mac = MM; break;
167 case 's': mac = MS; break;
168 case 'l': disp = 1; break;
181 infile = opn(*argv++);
183 infile = malloc(sizeof(Biobuf));
184 Binit(infile, 0, OREAD);
189 for(i='a'; i<='z' ; ++i)
191 for(i='A'; i<='Z'; ++i)
193 for(i='0'; i<='9'; ++i)
226 while ((fd = Bopen(p, OREAD)) == 0) {
227 if(msflag || p == devnull)
228 fatal("Cannot open file %s - quitting\n", p);
230 fprint(2, "Deroff: Cannot open file %s - continuing\n", p);
241 if(Bfildes(infile) != 0)
247 infile = opn(*argv++);
265 static struct chain *namechain= 0;
269 for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)
270 p += runetochar(p, &r);
274 if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")
275 || !strcmp(fname, "/sys/lib/tmac/tmac.s")) {
279 dir = dirstat(fname);
280 if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {
287 * see if this name has already been used
290 for(q = namechain; q; q = q->nextp)
291 if( !strcmp(fname, q->datap)) {
295 q = (struct chain*)malloc(sizeof(struct chain));
296 q->nextp = namechain;
297 q->datap = copys(fname);
304 fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");
309 fatal(char *s, char *p)
311 fprint(2, "deroff: ");
322 if(C == '.' || c == '\'')
330 regline(int macline, int vconst)
338 if(c == '%') /* no blank for hyphenation char */
343 if(intable && c=='T') {
345 if(c=='{' || c=='}') {
350 if(msflag == 1 && eqnflag == 1) {
365 Bprint(&bout, "%S\n", line);
370 putmac(Rune *rp, int vconst)
379 while(*rp == ' ' || *rp == '\t')
380 Bputrune(&bout, *rp++);
381 for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)
385 if(t > rp+vconst && charclass(*rp) == LETTER
386 && charclass(rp[1]) == LETTER) {
391 Bputrune(&bout, *rp++);
395 if(found && charclass(*rp) == PUNCT && rp[1] == '\0')
396 Bputrune(&bout, *rp++);
403 if(msflag && charclass(last) == PUNCT)
404 Bprint(&bout, " %C\n", last);
408 * break into words for -w option
419 * skip initial specials ampersands and apostrophes
421 while((i = charclass(*p1)) != EXTENDED && i < DIGIT)
425 for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)
426 if(i == LETTER || (underscoreflag && *p == '_'))
429 * MDM definition of word
433 * delete trailing ampersands and apostrophes
435 while(*--p == '\'' || *p == '&'
436 || charclass(*p) == PUNCT)
439 Bputrune(&bout, *p1++);
451 while(C==' ' || c=='\t')
457 if(c1=='.' && c2!='.')
459 if(msflag && c1 == '['){
465 if(c1 == '\\' && c2 == '\"')
468 if (filesp==files && c1=='E' && c2=='Q')
471 if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {
478 if(c1=='T' && c2=='E')
481 ((c1 == 'd' && c2 == 'e') ||
482 (c1 == 'i' && c2 == 'g') ||
483 (c1 == 'a' && c2 == 'm')))
486 if(c1=='s' && c2=='o') {
492 if(infile = opn(fname))
494 else infile = *filesp;
499 if(c1=='n' && c2=='x')
506 if(Bfildes(infile) != 0)
508 infile = *filesp = opn(fname);
511 if(c1 == 't' && c2 == 'm')
514 if(c1=='h' && c2=='w')
517 if(msflag && c1 == 'T' && c2 == 'L') {
522 if(msflag && c1=='N' && c2 == 'R')
525 if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
532 if(msflag && c1=='F' && c2=='S') {
537 if(msflag && (c1=='S' || c1=='N') && c2=='H') {
541 if(c1 == 'U' && c2 == 'X') {
543 Bprint(&bout, "UNIX\n");
545 Bprint(&bout, "UNIX ");
547 if(msflag && c1=='O' && c2=='K') {
551 if(msflag && c1=='N' && c2=='D')
554 if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
557 if(msflag && mac==MM && c2=='L') {
565 if(!msflag && c1=='P' && c2=='S') {
568 if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {
571 if(msflag && (c1 == 'K' && c2 == 'F')) {
574 if(msflag && c1=='n' && c2=='f')
577 if(msflag && c1=='c' && c2=='e')
580 if(c1=='.' && c2=='.') {
589 if(c1 <= 'Z' && msflag)
606 } while(C1 != '.' || C1 != '.' || C1 == '.');
616 sdis(char a1, char a2)
642 if((c2=C1) == '\n') {
643 if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
647 if(c1==a1 && c2 == a2) {
658 if(a1 == 'L' && c2 == 'L') {
662 if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {
667 if((mac == MS && c2 == 'P') ||
668 (mac == MM && c1 == 'H' && c2 == 'U')){
694 if(c != 'T' || C != 'E') {
697 while(C != '.' || pc != '\n' || C != 'T' || C != 'E')
714 if(C1 == '.' || c == '\'') {
715 while(C1==' ' || c=='\t')
717 if(c=='E' && C1=='N') {
731 if(C1=='e' && C1=='l')
732 if(C1=='i' && C1=='m') {
735 if((c1=c)=='\n' || (c2=C1)=='\n' ||
736 (c1=='o' && c2=='f' && C1=='f')) {
748 if(chars[c] == PUNCT)
758 * skip over a complete backslash vconstruction
776 while(C1>='0' && c<='9')
807 C1; /* discard argument number */
818 if((bdelim=C1) == '\n')
820 while(C1!='\n' && c!=bdelim)
838 if((t0 = t = malloc((strlen(s)+1))) == 0)
839 fatal("Cannot allocate memory", (char*)0);
850 while (C != L'\n' && !(L'0' <= c && c <= L'9'))
853 for (n = c-L'0';'0' <= C && c <= L'9';)
870 if(c == 'P' || C == 'P') {
901 if(charclass(c2) == PUNCT)
902 Bprint(&bout, " %C",c2);
925 if(C1 == '.' && c1 == '\n') {
926 if(C1 != 'P' || C1 != 'E') {
948 if(c == '\n' && p1 != line) {
953 Bprint(&bout, "%S\n\n", line);
965 case 0x2013: case 0x2014: /* en dash, em dash */