7 Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */
11 typedef struct Troffchar Troffchar;
12 typedef struct Htmlchar Htmlchar;
13 typedef struct Font Font;
14 typedef struct HTMLfont HTMLfont;
17 * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes.
18 * must be able to hold a pointer.
29 Anchor = 26, /* must be last */
32 enum /* magic emissions */
38 int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };
69 /* R must be first; it's the default representation for fonts we don't recognize */
70 HTMLfont htmlfonts[] =
75 "LucidaSansI", "i", Italic,
81 #define TABLE "<table border=0 cellpadding=0 cellspacing=0>"
84 onattr[8*sizeof(int)] =
86 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0,
90 "<tt><font size=+1>", /* cw */
91 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */
92 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */
93 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */
97 "<p><font size=+1><b>", /* heading 25 */
98 "<unused>", /* anchor 26 */
102 offattr[8*sizeof(int)] =
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0,
108 "</font></tt>", /* cw */
109 "<-/table>", /* indent1 */
110 "<-/table>", /* indent2 */
111 "<-/table>", /* indent3 */
115 "</b></font>", /* heading 25 */
116 "</a>", /* anchor 26 */
135 Char attr = 0; /* or'ed into each Char */
140 char** anchors; /* allocated in order */
146 char *title = "Plan 9 man page";
148 void process(Biobuf*, char*);
149 void mountfont(int, char*);
150 void switchfont(int);
162 sysfatal("malloc failed: %r");
167 erealloc(void *p, ulong n)
172 sysfatal("realloc failed: %r");
183 sysfatal("strdup failed: %r");
190 fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
195 hccmp(const void *va, const void *vb)
201 return a->value - b->value;
205 main(int argc, char *argv[])
211 for(i=0; i<nelem(htmlchars); i++){
212 chartorune(&r, htmlchars[i].utf);
213 htmlchars[i].value = r;
215 qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);
230 Binit(&bout, 1, OWRITE);
233 Binit(&in, 0, OREAD);
234 process(&in, "<stdin>");
237 for(i=0; i<argc; i++){
238 inp = Bopen(argv[i], OREAD);
240 sysfatal("can't open %s: %r", argv[i]);
241 process(inp, argv[i]);
253 if(nalloc == nchars){
255 chars = realloc(chars, nalloc*sizeof(chars[0]));
257 sysfatal("malloc failed: %r");
267 * Close man page references early, so that
269 * doesn't make the comma part of the link.
272 attr &= ~(1<<Anchor);
286 iputrune(Biobuf *b, Rune r)
290 if(linelen++ > 60 && r == ' ')
294 for(i=0; i<indentlevel; i++)
301 iputs(Biobuf *b, char *s)
303 if(s[0]=='<' && s[1]=='+'){
305 Bprint(b, "<%s", s+2);
308 }else if(s[0]=='<' && s[1]=='-'){
311 Bprint(b, "<%s", s+2);
326 /* walk up the nest stack until we reach something we need to turn off. */
327 for(i=0; i<nnest; i++)
331 /* turn off everything above that */
332 for(j=nnest-1; j>=i; j--)
333 iputs(&bout, offattr[nest[j]]);
335 /* turn on everything we just turned off but didn't want to */
336 for(j=i; j<nnest; j++)
338 iputs(&bout, onattr[nest[j]]);
342 /* shift the zeros (turned off things) up */
343 for(i=j=0; i<nnest; i++)
348 /* now turn on the new attributes */
349 for(i=0; i<nelem(attrorder); i++){
353 onattr[j] = anchors[nanchors++];
354 iputs(&bout, onattr[j]);
355 if(nnest >= nelem(nest))
356 sysfatal("nesting too deep");
370 for(i=0; i<nchars; i++){
373 /* next word is string to print */
374 iputs(&bout, (char*)chars[++i]);
378 iputrune(&bout, '\n');
379 iputs(&bout, TABLE "<tr height=5><td></table>");
380 iputrune(&bout, '\n');
386 * If we're going to something off after a space,
387 * let's just turn it off before.
389 if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
390 a ^= a & ~chars[i+1];
392 iputrune(&bout, c & 0xFFFF);
399 Bprint(&bout, "<head>\n");
400 Bprint(&bout, "<title>%s</title>\n", s);
401 Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
402 Bprint(&bout, "</head>\n");
403 Bprint(&bout, "<body bgcolor=#ffffff>\n");
409 Bprint(&bout, "</body></html>\n");
431 for(i=0; i<sizeof buf; i++){
466 for(i=0; i<sizeof buf; i++){
467 /* must get bytes not runes */
473 if(c == '\n' || c==' ' || c=='\t'){
483 setnum(Biobuf *b, char *name, int min, int max)
489 fprint(2, "set %s = %d\n", name, i);
492 sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
499 char *p, *fld[16], buf[1024];
505 sysfatal("xcmd error: %r");
507 fprint(2, "x command '%s'\n", p);
508 nfld = tokenize(p, fld, nelem(fld));
518 sysfatal("font %d out of range at %s:#%d", i, filename, cno);
519 mountfont(i, fld[2]);
525 if(nfld<2 || atoi(fld[1])!=res)
526 sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
535 if(nfld!=2 || strcmp(fld[1], "utf")!=0)
536 sysfatal("output for unknown typesetter type %s", fld[1]);
539 if(nfld<3 || strcmp(fld[1], "html")!=0)
541 /* is it a man reference of the form cp(1)? */
542 /* X manref start/end cp (1) */
543 if(nfld==6 && strcmp(fld[2], "manref")==0){
544 /* was the right macro; is it the right form? */
545 if(strlen(fld[5])>=3 &&
546 fld[5][0]=='(' && fld[5][2]==')' &&
547 '0'<=fld[5][1] && fld[5][1]<='9'){
548 if(strcmp(fld[3], "start") == 0){
549 /* set anchor attribute and remember string */
551 snprint(buf, sizeof buf,
552 "<a href=\"/magic/man2html/%c/%s\">",
555 anchors = erealloc(anchors, nanchors*sizeof(char*));
556 anchors[nanchors-1] = estrdup(buf);
557 }else if(strcmp(fld[3], "end") == 0)
558 attr &= ~(1<<Anchor);
560 }else if(strcmp(fld[2], "manPP") == 0){
563 }else if(nfld<4 || strcmp(fld[2], "manref")!=0){
564 if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */
566 /* clear all font attributes before paragraph */
567 emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))));
569 /* next emittec char will turn font attributes back on */
570 }else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
571 attr |= (1<<Heading);
572 else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
573 attr &= ~(1<<Heading);
575 fprint(2, "unknown in-line html %s... at %s:%#d\n",
576 fld[2], filename, cno);
581 fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
585 lookup(int c, Htmlchar tab[], int ntab)
593 if(c < tab[mid].value)
595 else if(c > tab[mid].value)
600 return -1; /* no match */
609 i = lookup(r, htmlchars, nelem(htmlchars));
611 emitstr(htmlchars[i].name);
621 for(i=0; troffchars[i].name!=nil; i++)
622 if(strcmp(s, troffchars[i].name) == 0)
623 return troffchars[i].value;
636 /* these most peculiar numbers appear in the troff -man output */
637 nind = ((prevlineH-1*res)+323)/324;
638 attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
640 attr |= (1<<Indent1);
642 attr |= (1<<Indent2);
644 attr |= (1<<Indent3);
651 process(Biobuf *b, char *name)
663 /* go to ground state */
669 case '0': case '1': case '2': case '3': case '4':
670 case '5': case '6': case '7': case '8': case '9':
674 sysfatal("illegal character motion at %s:#%d", filename, cno);
677 /* fall through to character case */
684 /* draw line; ignore */
687 while(c!='\n' && c!= Beof);
690 v = setnum(b, "font", 0, Nfont);
694 v = setnum(b, "hpos", -20000, 20000);
695 /* generate spaces if motion is large and within a line */
696 if(!atnewline && v>2*72)
702 setnum(b, "n1", -10000, 10000);
703 //Bprint(&bout, " N1=%d", v);
704 getc(b); /* space separates */
705 setnum(b, "n2", -10000, 10000);
707 if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */
712 page = setnum(b, "ps", -10000, 10000);
715 ps = setnum(b, "ps", 1, 1000);
718 vp += setnum(b, "vpos", -10000, 10000);
719 /* BUG: ignore motion */
730 emitstr(troffchar(p));
733 hp = setnum(b, "hpos", 0, 20000);
734 //Bprint(&bout, " H=%d ", hp);
737 vp = setnum(b, "vpos", 0, 10000);
740 fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
751 for(i=0; htmlfonts[i].name!=nil; i++)
752 if(strcmp(name, htmlfonts[i].name) == 0)
753 return &htmlfonts[i];
754 return &htmlfonts[0];
758 mountfont(int pos, char *name)
761 fprint(2, "mount font %s on %d\n", name, pos);
762 if(font[pos] != nil){
763 free(font[pos]->name);
766 font[pos] = emalloc(sizeof(Font));
767 font[pos]->name = estrdup(name);
768 font[pos]->htmlfont = htmlfont(name);
777 fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
780 hf = font[ft]->htmlfont;
782 attr &= ~(1<<hf->bit);
784 hf = font[ft]->htmlfont;
786 attr |= (1<<hf->bit);