]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/cmd/html2ms.c
fix filetype detecton by suffix so that multiple dots dont confuse it. (thanks kvik)
[plan9front.git] / sys / src / cmd / html2ms.c
index 22983621642e325528985809aa80a7887b0426ae..7f6e0825bc9781236c6d310d63e7bed59062f0ec 100644 (file)
@@ -23,53 +23,103 @@ struct Tag {
        void    (*close)(Text *, Tag *);
        union {
                void    *aux;
-               int     restore;
        };
 };
 
 struct Text {
-       char    font;
+       char*   fontstyle;
+       char*   fontsize;
        int     pre;
        int     pos;
        int     space;
        int     output;
+
+       char    *bp;
+       char    *wp;
+       int     nb;
 };
 
 void eatwhite(void);
-Tag *parsetext(Text *, Tag *);
+void parsetext(Text *, Tag *);
 int parsetag(Tag *);
 int parseattr(Attr *);
+void flushtext(Text *);
+char* getattr(Tag *, char *);
+int gotattr(Tag *, char *, char *);
+int gotstyle(Tag *, char *, char *);
+void reparent(Text *, Tag *, Tag *);
+void debugtag(Tag *, char *);
+
+Biobuf in;
 
-Biobuf in, out;
+void
+emitbuf(Text *text, char *buf, int nbuf)
+{
+       int nw;
+
+       nw = text->wp - text->bp;
+       if((text->nb - nw) < nbuf){
+               if(nbuf < 4096)
+                       text->nb = nw + 4096;
+               else
+                       text->nb = nw + nbuf;
+               text->bp = realloc(text->bp, text->nb);
+               text->wp = text->bp + nw;
+       }
+       memmove(text->wp, buf, nbuf);
+       text->wp += nbuf;
+}
+
+void
+emitrune(Text *text, Rune r)
+{
+       char buf[UTFmax+1];
+
+       if(r == '\r' || r =='\n'){
+               text->pos = 0;
+               text->space = 0;
+       }else
+               text->pos++;
+       emitbuf(text, buf, runetochar(buf, &r));
+}
 
 void
 emit(Text *text, char *fmt, ...)
 {
+       Rune buf[64];
        va_list a;
+       int i;
 
-       if(text->pos > 0){
-               text->pos = 0;
-               Bputc(&out, '\n');
-       }
+       if(fmt[0] == '.' && text->pos)
+               emitrune(text, '\n');
        va_start(a, fmt);
-       Bvprint(&out, fmt, a);
+       runevsnprint(buf, nelem(buf), fmt, a);
        va_end(a);
+       for(i=0; buf[i]; i++)
+               emitrune(text, buf[i]);
 }
 
 void
-restoreoutput(Text *text, Tag *tag)
+restoreoutput(Text *text, Tag *)
 {
-       text->output = tag->restore;
+       text->output = 1;
 }
 
 void
 ongarbage(Text *text, Tag *tag)
 {
-       tag->restore = text->output;
+       if(text->output == 0)
+               return;
        tag->close = restoreoutput;
        text->output = 0;
 }
 
+void
+onmeta(Text *, Tag *tag)
+{
+       tag->closing = 1;
+}
+
 void
 onp(Text *text, Tag *)
 {
@@ -77,16 +127,17 @@ onp(Text *text, Tag *)
 }
 
 void
-restorepre(Text *text, Tag *tag)
+restorepre(Text *text, Tag *)
 {
-       text->pre = tag->restore;
+       text->pre = 0;
        emit(text, ".DE\n");
 }
 
 void
 onpre(Text *text, Tag *tag)
 {
-       tag->restore = text->pre;
+       if(text->pre)
+               return;
        tag->close = restorepre;
        text->pre = 1;
        emit(text, ".DS L\n");
@@ -106,7 +157,7 @@ onli(Text *text, Tag *tag)
 void
 onh(Text *text, Tag *tag)
 {
-       emit(text, ".SH %c\n", tag->tag[1]);
+       emit(text, ".SH\n");
        tag->close = onp;
 }
 
@@ -120,45 +171,416 @@ onbr(Text *text, Tag *tag)
 }
 
 void
-restorefont(Text *text, Tag *tag)
+fontstyle(Text *text, char *style)
+{
+       if(strcmp(text->fontstyle, style) == 0)
+               return;
+       text->fontstyle = style;
+       emit(text, "\\f%s", style);
+}
+
+void
+fontsize(Text *text, char *size)
+{
+       if(strcmp(text->fontsize, size) == 0)
+               return;
+       text->fontsize = size;
+       emit(text, ".%s\n", size);
+}
+
+void
+restorefontstyle(Text *text, Tag *tag)
+{
+       fontstyle(text, tag->aux);
+}
+
+void
+restorefontsize(Text *text, Tag *tag)
+{
+       fontsize(text, tag->aux);
+}
+
+void
+oni(Text *text, Tag *tag)
+{
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "I");
+}
+
+void
+onb(Text *text, Tag *tag)
+{
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "B");
+}
+
+void onsmall(Text *text, Tag *tag);
+void onsup(Text *text, Tag *tag);
+
+void
+onsub(Text *text, Tag *tag)
+{
+       emit(text, "\\v\'0.5\'");
+       if(cistrcmp(tag->tag, "sub") == 0){
+               emit(text, "\\x\'0.5\'");
+               onsmall(text, tag);
+       } else
+               restorefontsize(text, tag);
+       tag->close = onsup;
+}
+
+void
+onsup(Text *text, Tag *tag)
+{
+       emit(text, "\\v\'-0.5\'");
+       if(cistrcmp(tag->tag, "sup") == 0){
+               emit(text, "\\x\'-0.5\'");
+               onsmall(text, tag);
+       }else
+               restorefontsize(text, tag);
+       tag->close = onsub;
+}
+
+/*
+ * this is poor mans CSS handler.
+ */
+void
+onspan(Text *text, Tag *tag)
 {
-       text->font = tag->restore;
-       text->pos += Bprint(&out, "\\f%c", text->font);
+       Attr *a;
+
+       if(!tag->opening)
+               return;
+
+       for(a=tag->attr; a < tag->attr+tag->nattr; a++){
+               if(cistrcmp(a->attr, "class") != 0)
+                       continue;
+
+               if(cistrcmp(a->val, "bold") == 0){
+                       onb(text, tag);
+                       return;
+               }
+               if(cistrcmp(a->val, "italic") == 0){
+                       oni(text, tag);
+                       return;
+               }
+               if(cistrcmp(a->val, "subscript") == 0){
+                       strcpy(tag->tag, "sub");
+                       onsub(text, tag);
+                       strcpy(tag->tag, "span");
+                       return;
+               }
+               if(cistrcmp(a->val, "superscript") == 0){
+                       strcpy(tag->tag, "sup");
+                       onsup(text, tag);
+                       strcpy(tag->tag, "span");
+                       return;
+               }
+       }
 }
 
 void
-onfont(Text *text, Tag *tag)
+ontt(Text *text, Tag *tag)
 {
-       if(text->font == 0)
-               text->font = 'R';
-       tag->restore = text->font;
-       tag->close = restorefont;
-       if(cistrcmp(tag->tag, "i") == 0)
-               text->font = 'I';
-       else if(cistrcmp(tag->tag, "b") == 0)
-               text->font = 'B';
-       text->pos += Bprint(&out, "\\f%c", text->font);
+       tag->aux = text->fontstyle;
+       tag->close = restorefontstyle;
+       fontstyle(text, "C");
+}
+
+void
+onsmall(Text *text, Tag *tag)
+{
+       tag->aux = text->fontsize;
+       tag->close = restorefontsize;
+       fontsize(text, "SM");
+}
+
+void
+onbig(Text *text, Tag *tag)
+{
+       tag->aux = text->fontsize;
+       tag->close = restorefontsize;
+       fontsize(text, "LG");
+}
+
+void
+endquote(Text *text, Tag *tag)
+{
+       if(cistrcmp(tag->tag, "q") == 0)
+               emitrune(text, '"');
+       emit(text, ".QE\n");
+}
+
+void
+onquote(Text *text, Tag *tag)
+{
+       tag->close = endquote;
+       if(cistrcmp(tag->tag, "q") == 0)
+               emit(text, ".QS\n\"");
+       else
+               emit(text, ".QP\n");
+}
+
+typedef struct Table Table;
+struct Table
+{
+       char    *bp;
+       int     nb;
+
+       Table   *next;
+       Table   *prev;
+       int     enclose;
+       int     brk;
+
+       char    fmt[4];
+
+       Text    save;
+};
+
+Tag*
+tabletag(Tag *tag)
+{
+       if(tag == nil)
+               return nil;
+       if(cistrcmp(tag->tag, "table") == 0)
+               return tag;
+       return tabletag(tag->up);
+}
+
+void
+dumprows(Text *text, Table *s, Table *e)
+{
+       
+       for(; s != e; s = s->next){
+               if(s->enclose)
+                       emit(text, "T{\n");
+               if(s->nb <= 0)
+                       emit(text, "\\ ");
+               else
+                       emitbuf(text, s->bp, s->nb);
+               if(s->enclose)
+                       emit(text, "\nT}");
+               emitrune(text, s->brk ? '\n' : '\t');
+       }
+}
+
+void
+endtable(Text *text, Tag *tag)
+{
+       int i, cols, rows;
+       Table *t, *h, *s;
+       Tag *tt;
+
+       /* reverse list */
+       h = nil;
+       t = tag->aux;
+       for(; t; t = t->prev){
+               t->next = h;
+               h = t;
+       }
+
+       /*
+        * nested table case, add our cells to the next table up.
+        * this is the best we can do, tbl doesnt support nesting
+        */
+       if(tt = tabletag(tag->up)){
+               while(t = h){
+                       h = h->next;
+                       t->next = nil;
+                       t->prev = tt->aux;
+                       tt->aux = t;
+               }
+               return;
+       }
+
+       cols = 0;
+       rows = 0;
+       for(i = 0, t = h; t; t = t->next){
+               i++;
+               if(t->brk){
+                       rows++;
+                       if(i > cols)
+                               cols = i;
+                       i = 0;
+               }
+       }
+
+       i = 0;
+       for(t = h; t; t = t->next){
+               i++;
+               if(t->brk){
+                       while(i < cols){
+                               s = mallocz(sizeof(Table), 1);
+                               strcpy(s->fmt, "L");
+                               s->brk = t->brk;
+                               t->brk = 0;
+                               s->next = t->next;
+                               t->next = s;
+                               i++;
+                       }
+                       break;
+               }
+       }
+
+       s = h;
+       while(s){
+               emit(text, ".TS\n");
+               if(gotattr(tag, "align", "center"))
+                       emit(text, "center ;\n");
+               i = 0;
+               for(t = s; t; t = t->next){
+                       emit(text, "%s", t->fmt);
+                       if(t->brk){
+                               emitrune(text, '\n');
+                               if(++i > 30){
+                                       t = t->next;
+                                       break;
+                               }
+                       }else
+                               emitrune(text, ' ');
+               }
+               emit(text, ".\n");
+               dumprows(text, s, t);
+               emit(text, ".TE\n");
+               s = t;
+       }
+
+       while(t = h){
+               h = t->next;
+               free(t->bp);
+               free(t);
+       }
+}
+
+void
+ontable(Text *, Tag *tag)
+{
+       tag->aux = nil;
+       tag->close = endtable;
+}
+
+void
+endcell(Text *text, Tag *tag)
+{
+       Table *t;
+       Tag *tt;
+       int i;
+
+       if((tt = tabletag(tag)) == nil)
+               return;
+       if(cistrcmp(tag->tag, "tr") == 0){
+               if(t = tt->aux)
+                       t->brk = 1;
+       } else {
+               t = tag->aux;
+               t->bp = text->bp;
+               t->nb = text->wp - text->bp;
+
+               for(i=0; i<t->nb; i++)
+                       if(strchr(" \t\r\n", t->bp[i]) == nil)
+                               break;
+               if(i > 0){
+                       memmove(t->bp, t->bp+i, t->nb - i);
+                       t->nb -= i;
+               }
+               while(t->nb > 0 && strchr(" \t\r\n", t->bp[t->nb-1]))
+                       t->nb--;
+               if(t->nb < 32){
+                       for(i=0; i<t->nb; i++)
+                               if(strchr("\t\r\n", t->bp[i]))
+                                       break;
+                       t->enclose = i < t->nb;
+               } else {
+                       t->enclose = 1;
+               }
+               if(gotstyle(tag, "text-align", "center") || gotstyle(tt, "text-align", "center"))
+                       strcpy(t->fmt, "C");
+               else
+                       strcpy(t->fmt, "L");
+               if(strcmp(tag->tag, "th") == 0)
+                       strcpy(t->fmt+1, "B");
+               t->prev = tt->aux;
+               tt->aux = t;
+               *text = t->save;
+       }
+}
+
+void
+oncell(Text *text, Tag *tag)
+{
+       Tag *tt;
+
+       if((tt = tabletag(tag)) == nil)
+               return;
+       if(cistrcmp(tag->tag, "tr")){
+               Table *t;
+
+               tt = tag->up;
+               while(tt && cistrcmp(tt->tag, "tr"))
+                       tt = tt->up;
+               if(tt == nil)
+                       return;
+               reparent(text, tag, tt);
+
+               t = mallocz(sizeof(*t), 1);
+               t->save = *text;
+               tag->aux = t;
+
+               text->bp = nil;
+               text->wp = nil;
+               text->nb = 0;
+               text->pos = 0;
+               text->space = 0;
+       } else
+               reparent(text, tag, tt);
+       tag->close = endcell;
 }
 
 struct {
        char    *tag;
        void    (*open)(Text *, Tag *);
 } ontag[] = {
+       "b",            onb,
+       "big",          onbig,
+       "blockquote",   onquote,
        "br",           onbr,
-       "hr",           onbr,
-       "b",            onfont,
-       "i",            onfont,
-       "p",            onp,
+       "cite",         oni,
+       "code",         ontt,
+       "dfn",          oni,
+       "em",           oni,
        "h1",           onh,
        "h2",           onh,
        "h3",           onh,
        "h4",           onh,
        "h5",           onh,
+       "h6",           onh,
+       "head",         ongarbage,
+       "hr",           onbr,
+       "i",            oni,
+       "img",          onmeta,
+       "kbd",          ontt,
        "li",           onli,
+       "link",         onmeta,
+       "meta",         onmeta,
+       "p",            onp,
        "pre",          onpre,
-       "head",         ongarbage,
-       "style",        ongarbage,
+       "q",            onquote,
+       "samp",         ontt,
        "script",       ongarbage,
+       "small",        onsmall,
+       "strong",       onb,
+       "style",        ongarbage,
+       "table",        ontable,
+       "td",           oncell,
+       "th",           oncell,
+       "tr",           oncell,
+       "sub",          onsub,
+       "sup",          onsup,
+       "span",         onspan,
+       "tt",           ontt,
+       "var",          oni,
 };
 
 void
@@ -198,10 +620,13 @@ parsecomment(void)
                        if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
                                continue;
                        while((c = Bgetc(&in)) > 0){
-                               if(c == ']')
-                                       if(Bgetc(&in) == ']')
-                                               if(Bgetc(&in) == '>')
-                                                       return;
+                               if(c == ']'){
+                                       if(Bgetc(&in) == ']'){
+                                               if(Bgetc(&in) != '>')
+                                                       Bungetc(&in);
+                                               return;
+                                       }
+                               }
                        }
                }
        }
@@ -296,90 +721,17 @@ parsetag(Tag *t)
        return n > 0;
 }
 
-struct {
-       char    *entity;
-       Rune    rune;
-} entities[] = {
-       "AElig", 198,   "Aacute", 193,  "Acirc", 194,   "Agrave", 192,  
-       "Alpha", 913,   "Aring", 197,   "Atilde", 195,  "Auml", 196,    
-       "Beta", 914,    "Ccedil", 199,  "Chi", 935,     "Dagger", 8225, 
-       "Delta", 916,   "ETH", 208,     "Eacute", 201,  "Ecirc", 202,   
-       "Egrave", 200,  "Epsilon", 917, "Eta", 919,     "Euml", 203,    
-       "Gamma", 915,   "Iacute", 205,  "Icirc", 206,   "Igrave", 204,  
-       "Iota", 921,    "Iuml", 207,    "Kappa", 922,   "Lambda", 923,  
-       "Mu", 924,      "Ntilde", 209,  "Nu", 925,      "OElig", 338,   
-       "Oacute", 211,  "Ocirc", 212,   "Ograve", 210,  "Omega", 937,   
-       "Omicron", 927, "Oslash", 216,  "Otilde", 213,  "Ouml", 214,    
-       "Phi", 934,     "Pi", 928,      "Prime", 8243,  "Psi", 936,     
-       "Rho", 929,     "Scaron", 352,  "Sigma", 931,   "THORN", 222,   
-       "Tau", 932,     "Theta", 920,   "Uacute", 218,  "Ucirc", 219,   
-       "Ugrave", 217,  "Upsilon", 933, "Uuml", 220,    "Xi", 926,      
-       "Yacute", 221,  "Yuml", 376,    "Zeta", 918,    "aacute", 225,  
-       "acirc", 226,   "acute", 180,   "aelig", 230,   "agrave", 224,  
-       "alefsym", 8501,"alpha", 945,   "amp", 38,      "and", 8743,    
-       "ang", 8736,    "aring", 229,   "asymp", 8776,  "atilde", 227,  
-       "auml", 228,    "bdquo", 8222,  "beta", 946,    "brvbar", 166,  
-       "bull", 8226,   "cap", 8745,    "ccedil", 231,  "cdots", 8943,  
-       "cedil", 184,   "cent", 162,    "chi", 967,     "circ", 710,    
-       "clubs", 9827,  "cong", 8773,   "copy", 169,    "crarr", 8629,  
-       "cup", 8746,    "curren", 164,  "dArr", 8659,   "dagger", 8224, 
-       "darr", 8595,   "ddots", 8945,  "deg", 176,     "delta", 948,   
-       "diams", 9830,  "divide", 247,  "eacute", 233,  "ecirc", 234,   
-       "egrave", 232,  "emdash", 8212, "empty", 8709,  "emsp", 8195,   
-       "endash", 8211, "ensp", 8194,   "epsilon", 949, "equiv", 8801,  
-       "eta", 951,     "eth", 240,     "euml", 235,    "euro", 8364,   
-       "exist", 8707,  "fnof", 402,    "forall", 8704, "frac12", 189,  
-       "frac14", 188,  "frac34", 190,  "frasl", 8260,  "gamma", 947,   
-       "ge", 8805,     "gt", 62,       "hArr", 8660,   "harr", 8596,   
-       "hearts", 9829, "hellip", 8230, "iacute", 237,  "icirc", 238,   
-       "iexcl", 161,   "igrave", 236,  "image", 8465,  "infin", 8734,  
-       "int", 8747,    "iota", 953,    "iquest", 191,  "isin", 8712,   
-       "iuml", 239,    "kappa", 954,   "lArr", 8656,   "lambda", 955,  
-       "lang", 9001,   "laquo", 171,   "larr", 8592,   "lceil", 8968,  
-       "ldots", 8230,  "ldquo", 8220,  "le", 8804,     "lfloor", 8970, 
-       "lowast", 8727, "loz", 9674,    "lrm", 8206,    "lsaquo", 8249, 
-       "lsquo", 8216,  "lt", 60,       "macr", 175,    "mdash", 8212,  
-       "micro", 181,   "middot", 183,  "minus", 8722,  "mu", 956,      
-       "nabla", 8711,  "nbsp", 160,    "ndash", 8211,  "ne", 8800,     
-       "ni", 8715,     "not", 172,     "notin", 8713,  "nsub", 8836,   
-       "ntilde", 241,  "nu", 957,      "oacute", 243,  "ocirc", 244,   
-       "oelig", 339,   "ograve", 242,  "oline", 8254,  "omega", 969,   
-       "omicron", 959, "oplus", 8853,  "or", 8744,     "ordf", 170,    
-       "ordm", 186,    "oslash", 248,  "otilde", 245,  "otimes", 8855, 
-       "ouml", 246,    "para", 182,    "part", 8706,   "permil", 8240, 
-       "perp", 8869,   "phi", 966,     "pi", 960,      "piv", 982,     
-       "plusmn", 177,  "pound", 163,   "prime", 8242,  "prod", 8719,   
-       "prop", 8733,   "psi", 968,     "quad", 8193,   "quot", 34,     
-       "rArr", 8658,   "radic", 8730,  "rang", 9002,   "raquo", 187,   
-       "rarr", 8594,   "rceil", 8969,  "rdquo", 8221,  "real", 8476,   
-       "reg", 174,     "rfloor", 8971, "rho", 961,     "rlm", 8207,    
-       "rsaquo", 8250, "rsquo", 8217,  "sbquo", 8218,  "scaron", 353,  
-       "sdot", 8901,   "sect", 167,    "shy", 173,     "sigma", 963,   
-       "sigmaf", 962,  "sim", 8764,    "sp", 8194,     "spades", 9824, 
-       "sub", 8834,    "sube", 8838,   "sum", 8721,    "sup", 8835,    
-       "sup1", 185,    "sup2", 178,    "sup3", 179,    "supe", 8839,   
-       "szlig", 223,   "tau", 964,     "there4", 8756, "theta", 952,   
-       "thetasym", 977,"thinsp", 8201, "thorn", 254,   "tilde", 732,   
-       "times", 215,   "trade", 8482,  "uArr", 8657,   "uacute", 250,  
-       "uarr", 8593,   "ucirc", 251,   "ugrave", 249,  "uml", 168,     
-       "upsih", 978,   "upsilon", 965, "uuml", 252,    "varepsilon", 8712,     
-       "varphi", 981,  "varpi", 982,   "varrho", 1009, "vdots", 8942,  
-       "vsigma", 962,  "vtheta", 977,  "weierp", 8472, "xi", 958,      
-       "yacute", 253,  "yen", 165,     "yuml", 255,    "zeta", 950,    
-       "zwj", 8205,    "zwnj", 8204,
-};
-
 Rune
 parserune(int c)
 {
        char buf[10];
-       int i, n;
+       int n;
        Rune r;
 
        n = 0;
        if(c == '&'){
                while((c = Bgetc(&in)) > 0){
-                       if(strchr("\n\r\t ;</>", c)){
+                       if(strchr(";&</>\n\r\t ", c)){
                                if(c != ';')
                                        Bungetc(&in);
                                if(n == 0)
@@ -391,15 +743,17 @@ parserune(int c)
                        buf[n++] = c;
                }
                buf[n] = 0;
-               if(buf[0] == '#')
-                       return atoi(buf+1);
-               for(i=0; i<nelem(entities); i++){
-                       n = strcmp(buf, entities[i].entity);
-                       if(n == 0)
-                               return entities[i].rune;
-                       if(n < 0)
-                               break;
-               }
+               if(strcmp(buf, "lt") == 0)
+                       return '<';
+               if(strcmp(buf, "gt") == 0)
+                       return '>';
+               if(strcmp(buf, "quot") == 0)
+                       return '"';
+               if(strcmp(buf, "apos") == 0)
+                       return '\'';
+               if(strcmp(buf, "amp") == 0)
+                       return '&';
+               /* use tcs -f html to handle the rest. */
        } else {
                do {
                        buf[n++] = c;
@@ -432,7 +786,11 @@ substrune(Rune r)
 void
 debugtag(Tag *tag, char *dbg)
 {
-       if(1) return;
+       if(1){
+               USED(tag);
+               USED(dbg);
+               return;
+       }
 
        if(tag == nil)
                return;
@@ -440,48 +798,115 @@ debugtag(Tag *tag, char *dbg)
        fprint(2, "%s %s%s", tag->tag, dbg ? dbg : " > ", dbg ? "\n" : "");
 }
 
+char*
+getattr(Tag *tag, char *attr)
+{
+       int i;
+
+       for(i=0; i<tag->nattr; i++)
+               if(cistrcmp(tag->attr[i].attr, attr) == 0)
+                       return tag->attr[i].val;
+       return nil;
+}
+
+int
+gotattr(Tag *tag, char *attr, char *val)
+{
+       char *v;
 
-Tag*
+       if((v = getattr(tag, attr)) == nil)
+               return 0;
+       return cistrstr(v, val) != 0;
+}
+
+int
+gotstyle(Tag *tag, char *style, char *val)
+{
+       char *v;
+
+       if((v = getattr(tag, "style")) == nil)
+               return 0;
+       if((v = cistrstr(v, style)) == nil)
+               return 0;
+       v += strlen(style);
+       while(*v && *v != ':')
+               v++;
+       if(*v != ':')
+               return 0;
+       v++;
+       while(*v && strchr("\t ", *v))
+               v++;
+       if(cistrncmp(v, val, strlen(val)))
+               return 0;
+       return 1;
+}
+
+void
+reparent(Text *text, Tag *tag, Tag *up)
+{
+       Tag *old;
+
+       old = tag->up;
+       while(old != up){
+               debugtag(old, "reparent");
+               if(old->close){
+                       old->close(text, old);
+                       old->close = nil;
+               }
+               old = old->up;
+       }
+       tag->up = up;
+}
+
+
+void
 parsetext(Text *text, Tag *tag)
 {
-       Tag *rtag;
+       int hidden, c;
+       Tag t, *up;
        Rune r;
-       int c;
 
-       rtag = tag;
-       debugtag(tag, "open");
+       if(tag){
+               up = tag->up;
+               debugtag(tag, "open");
+               for(c = 0; c < nelem(ontag); c++){
+                       if(cistrcmp(tag->tag, ontag[c].tag) == 0){
+                               ontag[c].open(text, tag);
+                               break;
+                       }
+               }
+               hidden = getattr(tag, "hidden") || gotstyle(tag, "display", "none");
+       } else {
+               up = nil;
+               hidden = 0;
+       }
        if(tag == nil || tag->closing == 0){
                while((c = Bgetc(&in)) > 0){
                        if(c == '<'){
-                               Tag t;
-
                                memset(&t, 0, sizeof(t));
                                if(parsetag(&t)){
                                        if(t.opening){
                                                t.up = tag;
-                                               for(c = 0; c < nelem(ontag); c++){
-                                                       if(cistrcmp(t.tag, ontag[c].tag) == 0){
-                                                               ontag[c].open(text, &t);
-                                                               break;
-                                                       }
-                                               }
-                                               rtag = parsetext(text, &t);
-                                               if(rtag == &t)
-                                                       rtag = tag;
-                                               else
+                                               parsetext(text, &t);
+                                               if(t.up != tag){
+                                                       debugtag(tag, "skip");
+                                                       up = t.up;
                                                        break;
+                                               }
+                                               debugtag(tag, "back");
                                        } else if(t.closing){
-                                               while(rtag && cistrcmp(rtag->tag, t.tag))
-                                                       rtag = rtag->up;
-                                               if(rtag == nil)
-                                                       rtag = tag;
-                                               else
+                                               up = tag;
+                                               while(up && cistrcmp(up->tag, t.tag))
+                                                       up = up->up;
+                                               if(up){
+                                                       up = up->up;
                                                        break;
+                                               }
                                        }
                                }
                                continue;
                        }
-                       if(!text->output)
+                       if(hidden || !text->output)
                                continue;
                        r = substrune(parserune(c));
                        switch(r){
@@ -491,50 +916,53 @@ parsetext(Text *text, Tag *tag)
                        case '\t':
                                if(text->pre == 0){
                                        text->space = 1;
-                                       continue;
+                                       break;
                                }
                        default:
-                               if(r == '\n' || r == '\r')
-                                       text->pos = 0;
                                if(text->space){
-                                       text->space = 0;
-                                       if(text->pos >= 70){
-                                               text->pos = 0;
-                                               Bputc(&out, '\n');
-                                       } else if(text->pos > 0){
-                                               text->pos++;
-                                               Bputc(&out, ' ');
-                                       }
+                                       if(text->pos >= 70)
+                                               emitrune(text, '\n');
+                                       else if(text->pos > 0)
+                                               emitrune(text, ' ');
                                }
-                               if(text->pos == 0 && r == '.'){
-                                       text->pos++;
-                                       Bputc(&out, ' ');
-                               }
-                               text->pos++;
-                               if(r == 0xA0){
+                               if((text->pos == 0 && r == '.') || r == '\\')
+                                       emit(text, "\\&");
+                               if(r == '\\' || r == 0xA0)
+                                       emitrune(text, '\\');
+                               if(r == 0xA0)
                                        r = ' ';
-                                       Bputc(&out, '\\');
-                               }
-                               Bprint(&out, "%C", r);
+                               emitrune(text, r);
+                               text->space = 0;
                        }
                }
        }
-       debugtag(tag, "close");
-       if(tag && tag->close)
-               tag->close(text, tag);
-       return rtag;
+       if(tag){
+               debugtag(tag, "close");
+               if(tag->close){
+                       tag->close(text, tag);
+                       tag->close = nil;
+               }
+               if(up)
+                       tag->up = up;
+       }
+}
+
+void
+inittext(Text *text)
+{
+       memset(text, 0, sizeof(Text));
+       text->fontstyle = "R";
+       text->fontsize = "NL";
+       text->output = 1;
 }
 
 void
 main(void)
 {
        Text text;
-
        Binit(&in, 0, OREAD);
-       Binit(&out, 1, OWRITE);
-
-       memset(&text, 0, sizeof(text));
-       text.output = 1;
+       inittext(&text);
        parsetext(&text, nil);
        emit(&text, "\n");
+       write(1, text.bp, text.wp - text.bp);
 }