]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/cmd/html2ms.c
webfs(4): document -d and -D flags
[plan9front.git] / sys / src / cmd / html2ms.c
index fd175d72f835e4318913ecd4ea43ff7783d46903..7f6e0825bc9781236c6d310d63e7bed59062f0ec 100644 (file)
@@ -23,53 +23,103 @@ struct Tag {
        void    (*close)(Text *, Tag *);
        union {
                void    *aux;
-               int     restore;
        };
 };
 
 struct Text {
-       char    font;
+       char*   fontstyle;
+       char*   fontsize;
        int     pre;
        int     pos;
        int     space;
        int     output;
+
+       char    *bp;
+       char    *wp;
+       int     nb;
 };
 
 void eatwhite(void);
-Tag *parsetext(Text *, Tag *);
+void parsetext(Text *, Tag *);
 int parsetag(Tag *);
 int parseattr(Attr *);
+void flushtext(Text *);
+char* getattr(Tag *, char *);
+int gotattr(Tag *, char *, char *);
+int gotstyle(Tag *, char *, char *);
+void reparent(Text *, Tag *, Tag *);
+void debugtag(Tag *, char *);
+
+Biobuf in;
+
+void
+emitbuf(Text *text, char *buf, int nbuf)
+{
+       int nw;
+
+       nw = text->wp - text->bp;
+       if((text->nb - nw) < nbuf){
+               if(nbuf < 4096)
+                       text->nb = nw + 4096;
+               else
+                       text->nb = nw + nbuf;
+               text->bp = realloc(text->bp, text->nb);
+               text->wp = text->bp + nw;
+       }
+       memmove(text->wp, buf, nbuf);
+       text->wp += nbuf;
+}
 
-Biobuf in, out;
+void
+emitrune(Text *text, Rune r)
+{
+       char buf[UTFmax+1];
+
+       if(r == '\r' || r =='\n'){
+               text->pos = 0;
+               text->space = 0;
+       }else
+               text->pos++;
+       emitbuf(text, buf, runetochar(buf, &r));
+}
 
 void
 emit(Text *text, char *fmt, ...)
 {
+       Rune buf[64];
        va_list a;
+       int i;
 
-       if(text->pos > 0){
-               text->pos = 0;
-               Bputc(&out, '\n');
-       }
+       if(fmt[0] == '.' && text->pos)
+               emitrune(text, '\n');
        va_start(a, fmt);
-       Bvprint(&out, fmt, a);
+       runevsnprint(buf, nelem(buf), fmt, a);
        va_end(a);
+       for(i=0; buf[i]; i++)
+               emitrune(text, buf[i]);
 }
 
 void
-restoreoutput(Text *text, Tag *tag)
+restoreoutput(Text *text, Tag *)
 {
-       text->output = tag->restore;
+       text->output = 1;
 }
 
 void
 ongarbage(Text *text, Tag *tag)
 {
-       tag->restore = text->output;
+       if(text->output == 0)
+               return;
        tag->close = restoreoutput;
        text->output = 0;
 }
 
+void
+onmeta(Text *, Tag *tag)
+{
+       tag->closing = 1;
+}
+
 void
 onp(Text *text, Tag *)
 {
@@ -77,16 +127,17 @@ onp(Text *text, Tag *)
 }
 
 void
-restorepre(Text *text, Tag *tag)
+restorepre(Text *text, Tag *)
 {
-       text->pre = tag->restore;
+       text->pre = 0;
        emit(text, ".DE\n");
 }
 
 void
 onpre(Text *text, Tag *tag)
 {
-       tag->restore = text->pre;
+       if(text->pre)
+               return;
        tag->close = restorepre;
        text->pre = 1;
        emit(text, ".DS L\n");
@@ -106,7 +157,7 @@ onli(Text *text, Tag *tag)
 void
 onh(Text *text, Tag *tag)
 {
-       emit(text, ".SH %c\n", tag->tag[1]);
+       emit(text, ".SH\n");
        tag->close = onp;
 }
 
@@ -120,45 +171,416 @@ onbr(Text *text, Tag *tag)
 }
 
 void
-restorefont(Text *text, Tag *tag)
+fontstyle(Text *text, char *style)
+{
+       if(strcmp(text->fontstyle, style) == 0)
+               return;
+       text->fontstyle = style;
+       emit(text, "\\f%s", style);
+}
+
+void
+fontsize(Text *text, char *size)
+{
+       if(strcmp(text->fontsize, size) == 0)
+               return;
+       text->fontsize = size;
+       emit(text, ".%s\n", size);
+}
+
+void
+restorefontstyle(Text *text, Tag *tag)
+{
+       fontstyle(text, tag->aux);
+}
+
+void
+restorefontsize(Text *text, Tag *tag)
+{
+       fontsize(text, tag->aux);
+}
+
+void
+oni(Text *text, Tag *tag)
+{
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "I");
+}
+
+void
+onb(Text *text, Tag *tag)
+{
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "B");
+}
+
+void onsmall(Text *text, Tag *tag);
+void onsup(Text *text, Tag *tag);
+
+void
+onsub(Text *text, Tag *tag)
+{
+       emit(text, "\\v\'0.5\'");
+       if(cistrcmp(tag->tag, "sub") == 0){
+               emit(text, "\\x\'0.5\'");
+               onsmall(text, tag);
+       } else
+               restorefontsize(text, tag);
+       tag->close = onsup;
+}
+
+void
+onsup(Text *text, Tag *tag)
+{
+       emit(text, "\\v\'-0.5\'");
+       if(cistrcmp(tag->tag, "sup") == 0){
+               emit(text, "\\x\'-0.5\'");
+               onsmall(text, tag);
+       }else
+               restorefontsize(text, tag);
+       tag->close = onsub;
+}
+
+/*
+ * this is poor mans CSS handler.
+ */
+void
+onspan(Text *text, Tag *tag)
+{
+       Attr *a;
+
+       if(!tag->opening)
+               return;
+
+       for(a=tag->attr; a < tag->attr+tag->nattr; a++){
+               if(cistrcmp(a->attr, "class") != 0)
+                       continue;
+
+               if(cistrcmp(a->val, "bold") == 0){
+                       onb(text, tag);
+                       return;
+               }
+               if(cistrcmp(a->val, "italic") == 0){
+                       oni(text, tag);
+                       return;
+               }
+               if(cistrcmp(a->val, "subscript") == 0){
+                       strcpy(tag->tag, "sub");
+                       onsub(text, tag);
+                       strcpy(tag->tag, "span");
+                       return;
+               }
+               if(cistrcmp(a->val, "superscript") == 0){
+                       strcpy(tag->tag, "sup");
+                       onsup(text, tag);
+                       strcpy(tag->tag, "span");
+                       return;
+               }
+       }
+}
+
+void
+ontt(Text *text, Tag *tag)
+{
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "C");
+}
+
+void
+onsmall(Text *text, Tag *tag)
+{
+       tag->aux = text->fontsize;
+       tag->close = restorefontsize;
+       fontsize(text, "SM");
+}
+
+void
+onbig(Text *text, Tag *tag)
+{
+       tag->aux = text->fontsize;
+       tag->close = restorefontsize;
+       fontsize(text, "LG");
+}
+
+void
+endquote(Text *text, Tag *tag)
+{
+       if(cistrcmp(tag->tag, "q") == 0)
+               emitrune(text, '"');
+       emit(text, ".QE\n");
+}
+
+void
+onquote(Text *text, Tag *tag)
+{
+       tag->close = endquote;
+       if(cistrcmp(tag->tag, "q") == 0)
+               emit(text, ".QS\n\"");
+       else
+               emit(text, ".QP\n");
+}
+
+typedef struct Table Table;
+struct Table
+{
+       char    *bp;
+       int     nb;
+
+       Table   *next;
+       Table   *prev;
+       int     enclose;
+       int     brk;
+
+       char    fmt[4];
+
+       Text    save;
+};
+
+Tag*
+tabletag(Tag *tag)
+{
+       if(tag == nil)
+               return nil;
+       if(cistrcmp(tag->tag, "table") == 0)
+               return tag;
+       return tabletag(tag->up);
+}
+
+void
+dumprows(Text *text, Table *s, Table *e)
+{
+       
+       for(; s != e; s = s->next){
+               if(s->enclose)
+                       emit(text, "T{\n");
+               if(s->nb <= 0)
+                       emit(text, "\\ ");
+               else
+                       emitbuf(text, s->bp, s->nb);
+               if(s->enclose)
+                       emit(text, "\nT}");
+               emitrune(text, s->brk ? '\n' : '\t');
+       }
+}
+
+void
+endtable(Text *text, Tag *tag)
+{
+       int i, cols, rows;
+       Table *t, *h, *s;
+       Tag *tt;
+
+       /* reverse list */
+       h = nil;
+       t = tag->aux;
+       for(; t; t = t->prev){
+               t->next = h;
+               h = t;
+       }
+
+       /*
+        * nested table case, add our cells to the next table up.
+        * this is the best we can do, tbl doesnt support nesting
+        */
+       if(tt = tabletag(tag->up)){
+               while(t = h){
+                       h = h->next;
+                       t->next = nil;
+                       t->prev = tt->aux;
+                       tt->aux = t;
+               }
+               return;
+       }
+
+       cols = 0;
+       rows = 0;
+       for(i = 0, t = h; t; t = t->next){
+               i++;
+               if(t->brk){
+                       rows++;
+                       if(i > cols)
+                               cols = i;
+                       i = 0;
+               }
+       }
+
+       i = 0;
+       for(t = h; t; t = t->next){
+               i++;
+               if(t->brk){
+                       while(i < cols){
+                               s = mallocz(sizeof(Table), 1);
+                               strcpy(s->fmt, "L");
+                               s->brk = t->brk;
+                               t->brk = 0;
+                               s->next = t->next;
+                               t->next = s;
+                               i++;
+                       }
+                       break;
+               }
+       }
+
+       s = h;
+       while(s){
+               emit(text, ".TS\n");
+               if(gotattr(tag, "align", "center"))
+                       emit(text, "center ;\n");
+               i = 0;
+               for(t = s; t; t = t->next){
+                       emit(text, "%s", t->fmt);
+                       if(t->brk){
+                               emitrune(text, '\n');
+                               if(++i > 30){
+                                       t = t->next;
+                                       break;
+                               }
+                       }else
+                               emitrune(text, ' ');
+               }
+               emit(text, ".\n");
+               dumprows(text, s, t);
+               emit(text, ".TE\n");
+               s = t;
+       }
+
+       while(t = h){
+               h = t->next;
+               free(t->bp);
+               free(t);
+       }
+}
+
+void
+ontable(Text *, Tag *tag)
+{
+       tag->aux = nil;
+       tag->close = endtable;
+}
+
+void
+endcell(Text *text, Tag *tag)
 {
-       text->font = tag->restore;
-       text->pos += Bprint(&out, "\\f%c", text->font);
+       Table *t;
+       Tag *tt;
+       int i;
+
+       if((tt = tabletag(tag)) == nil)
+               return;
+       if(cistrcmp(tag->tag, "tr") == 0){
+               if(t = tt->aux)
+                       t->brk = 1;
+       } else {
+               t = tag->aux;
+               t->bp = text->bp;
+               t->nb = text->wp - text->bp;
+
+               for(i=0; i<t->nb; i++)
+                       if(strchr(" \t\r\n", t->bp[i]) == nil)
+                               break;
+               if(i > 0){
+                       memmove(t->bp, t->bp+i, t->nb - i);
+                       t->nb -= i;
+               }
+               while(t->nb > 0 && strchr(" \t\r\n", t->bp[t->nb-1]))
+                       t->nb--;
+               if(t->nb < 32){
+                       for(i=0; i<t->nb; i++)
+                               if(strchr("\t\r\n", t->bp[i]))
+                                       break;
+                       t->enclose = i < t->nb;
+               } else {
+                       t->enclose = 1;
+               }
+               if(gotstyle(tag, "text-align", "center") || gotstyle(tt, "text-align", "center"))
+                       strcpy(t->fmt, "C");
+               else
+                       strcpy(t->fmt, "L");
+               if(strcmp(tag->tag, "th") == 0)
+                       strcpy(t->fmt+1, "B");
+               t->prev = tt->aux;
+               tt->aux = t;
+               *text = t->save;
+       }
 }
 
 void
-onfont(Text *text, Tag *tag)
+oncell(Text *text, Tag *tag)
 {
-       if(text->font == 0)
-               text->font = 'R';
-       tag->restore = text->font;
-       tag->close = restorefont;
-       if(cistrcmp(tag->tag, "i") == 0)
-               text->font = 'I';
-       else if(cistrcmp(tag->tag, "b") == 0)
-               text->font = 'B';
-       text->pos += Bprint(&out, "\\f%c", text->font);
+       Tag *tt;
+
+       if((tt = tabletag(tag)) == nil)
+               return;
+       if(cistrcmp(tag->tag, "tr")){
+               Table *t;
+
+               tt = tag->up;
+               while(tt && cistrcmp(tt->tag, "tr"))
+                       tt = tt->up;
+               if(tt == nil)
+                       return;
+               reparent(text, tag, tt);
+
+               t = mallocz(sizeof(*t), 1);
+               t->save = *text;
+               tag->aux = t;
+
+               text->bp = nil;
+               text->wp = nil;
+               text->nb = 0;
+               text->pos = 0;
+               text->space = 0;
+       } else
+               reparent(text, tag, tt);
+       tag->close = endcell;
 }
 
 struct {
        char    *tag;
        void    (*open)(Text *, Tag *);
 } ontag[] = {
+       "b",            onb,
+       "big",          onbig,
+       "blockquote",   onquote,
        "br",           onbr,
-       "hr",           onbr,
-       "b",            onfont,
-       "i",            onfont,
-       "p",            onp,
+       "cite",         oni,
+       "code",         ontt,
+       "dfn",          oni,
+       "em",           oni,
        "h1",           onh,
        "h2",           onh,
        "h3",           onh,
        "h4",           onh,
        "h5",           onh,
+       "h6",           onh,
+       "head",         ongarbage,
+       "hr",           onbr,
+       "i",            oni,
+       "img",          onmeta,
+       "kbd",          ontt,
        "li",           onli,
+       "link",         onmeta,
+       "meta",         onmeta,
+       "p",            onp,
        "pre",          onpre,
-       "head",         ongarbage,
-       "style",        ongarbage,
+       "q",            onquote,
+       "samp",         ontt,
        "script",       ongarbage,
+       "small",        onsmall,
+       "strong",       onb,
+       "style",        ongarbage,
+       "table",        ontable,
+       "td",           oncell,
+       "th",           oncell,
+       "tr",           oncell,
+       "sub",          onsub,
+       "sup",          onsup,
+       "span",         onspan,
+       "tt",           ontt,
+       "var",          oni,
 };
 
 void
@@ -198,10 +620,13 @@ parsecomment(void)
                        if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
                                continue;
                        while((c = Bgetc(&in)) > 0){
-                               if(c == ']')
-                                       if(Bgetc(&in) == ']')
-                                               if(Bgetc(&in) == '>')
-                                                       return;
+                               if(c == ']'){
+                                       if(Bgetc(&in) == ']'){
+                                               if(Bgetc(&in) != '>')
+                                                       Bungetc(&in);
+                                               return;
+                                       }
+                               }
                        }
                }
        }
@@ -300,7 +725,7 @@ Rune
 parserune(int c)
 {
        char buf[10];
-       int i, n;
+       int n;
        Rune r;
 
        n = 0;
@@ -324,6 +749,8 @@ parserune(int c)
                        return '>';
                if(strcmp(buf, "quot") == 0)
                        return '"';
+               if(strcmp(buf, "apos") == 0)
+                       return '\'';
                if(strcmp(buf, "amp") == 0)
                        return '&';
                /* use tcs -f html to handle the rest. */
@@ -359,7 +786,11 @@ substrune(Rune r)
 void
 debugtag(Tag *tag, char *dbg)
 {
-       if(1) return;
+       if(1){
+               USED(tag);
+               USED(dbg);
+               return;
+       }
 
        if(tag == nil)
                return;
@@ -367,48 +798,115 @@ debugtag(Tag *tag, char *dbg)
        fprint(2, "%s %s%s", tag->tag, dbg ? dbg : " > ", dbg ? "\n" : "");
 }
 
+char*
+getattr(Tag *tag, char *attr)
+{
+       int i;
 
-Tag*
+       for(i=0; i<tag->nattr; i++)
+               if(cistrcmp(tag->attr[i].attr, attr) == 0)
+                       return tag->attr[i].val;
+       return nil;
+}
+
+int
+gotattr(Tag *tag, char *attr, char *val)
+{
+       char *v;
+
+       if((v = getattr(tag, attr)) == nil)
+               return 0;
+       return cistrstr(v, val) != 0;
+}
+
+int
+gotstyle(Tag *tag, char *style, char *val)
+{
+       char *v;
+
+       if((v = getattr(tag, "style")) == nil)
+               return 0;
+       if((v = cistrstr(v, style)) == nil)
+               return 0;
+       v += strlen(style);
+       while(*v && *v != ':')
+               v++;
+       if(*v != ':')
+               return 0;
+       v++;
+       while(*v && strchr("\t ", *v))
+               v++;
+       if(cistrncmp(v, val, strlen(val)))
+               return 0;
+       return 1;
+}
+
+void
+reparent(Text *text, Tag *tag, Tag *up)
+{
+       Tag *old;
+
+       old = tag->up;
+       while(old != up){
+               debugtag(old, "reparent");
+               if(old->close){
+                       old->close(text, old);
+                       old->close = nil;
+               }
+               old = old->up;
+       }
+       tag->up = up;
+}
+
+
+void
 parsetext(Text *text, Tag *tag)
 {
-       Tag *rtag;
+       int hidden, c;
+       Tag t, *up;
        Rune r;
-       int c;
 
-       rtag = tag;
-       debugtag(tag, "open");
+       if(tag){
+               up = tag->up;
+               debugtag(tag, "open");
+               for(c = 0; c < nelem(ontag); c++){
+                       if(cistrcmp(tag->tag, ontag[c].tag) == 0){
+                               ontag[c].open(text, tag);
+                               break;
+                       }
+               }
+               hidden = getattr(tag, "hidden") || gotstyle(tag, "display", "none");
+       } else {
+               up = nil;
+               hidden = 0;
+       }
        if(tag == nil || tag->closing == 0){
                while((c = Bgetc(&in)) > 0){
                        if(c == '<'){
-                               Tag t;
-
                                memset(&t, 0, sizeof(t));
                                if(parsetag(&t)){
                                        if(t.opening){
                                                t.up = tag;
-                                               for(c = 0; c < nelem(ontag); c++){
-                                                       if(cistrcmp(t.tag, ontag[c].tag) == 0){
-                                                               ontag[c].open(text, &t);
-                                                               break;
-                                                       }
-                                               }
-                                               rtag = parsetext(text, &t);
-                                               if(rtag == &t)
-                                                       rtag = tag;
-                                               else
+                                               parsetext(text, &t);
+                                               if(t.up != tag){
+                                                       debugtag(tag, "skip");
+                                                       up = t.up;
                                                        break;
+                                               }
+                                               debugtag(tag, "back");
                                        } else if(t.closing){
-                                               while(rtag && cistrcmp(rtag->tag, t.tag))
-                                                       rtag = rtag->up;
-                                               if(rtag == nil)
-                                                       rtag = tag;
-                                               else
+                                               up = tag;
+                                               while(up && cistrcmp(up->tag, t.tag))
+                                                       up = up->up;
+                                               if(up){
+                                                       up = up->up;
                                                        break;
+                                               }
                                        }
                                }
                                continue;
                        }
-                       if(!text->output)
+                       if(hidden || !text->output)
                                continue;
                        r = substrune(parserune(c));
                        switch(r){
@@ -418,50 +916,53 @@ parsetext(Text *text, Tag *tag)
                        case '\t':
                                if(text->pre == 0){
                                        text->space = 1;
-                                       continue;
+                                       break;
                                }
                        default:
-                               if(r == '\n' || r == '\r')
-                                       text->pos = 0;
                                if(text->space){
-                                       text->space = 0;
-                                       if(text->pos >= 70){
-                                               text->pos = 0;
-                                               Bputc(&out, '\n');
-                                       } else if(text->pos > 0){
-                                               text->pos++;
-                                               Bputc(&out, ' ');
-                                       }
-                               }
-                               if(text->pos == 0 && r == '.'){
-                                       text->pos++;
-                                       Bputc(&out, ' ');
+                                       if(text->pos >= 70)
+                                               emitrune(text, '\n');
+                                       else if(text->pos > 0)
+                                               emitrune(text, ' ');
                                }
-                               text->pos++;
-                               if(r == 0xA0){
+                               if((text->pos == 0 && r == '.') || r == '\\')
+                                       emit(text, "\\&");
+                               if(r == '\\' || r == 0xA0)
+                                       emitrune(text, '\\');
+                               if(r == 0xA0)
                                        r = ' ';
-                                       Bputc(&out, '\\');
-                               }
-                               Bprint(&out, "%C", r);
+                               emitrune(text, r);
+                               text->space = 0;
                        }
                }
        }
-       debugtag(tag, "close");
-       if(tag && tag->close)
-               tag->close(text, tag);
-       return rtag;
+       if(tag){
+               debugtag(tag, "close");
+               if(tag->close){
+                       tag->close(text, tag);
+                       tag->close = nil;
+               }
+               if(up)
+                       tag->up = up;
+       }
+}
+
+void
+inittext(Text *text)
+{
+       memset(text, 0, sizeof(Text));
+       text->fontstyle = "R";
+       text->fontsize = "NL";
+       text->output = 1;
 }
 
 void
 main(void)
 {
        Text text;
-
        Binit(&in, 0, OREAD);
-       Binit(&out, 1, OWRITE);
-
-       memset(&text, 0, sizeof(text));
-       text.output = 1;
+       inittext(&text);
        parsetext(&text, nil);
        emit(&text, "\n");
+       write(1, text.bp, text.wp - text.bp);
 }