void (*close)(Text *, Tag *);
union {
void *aux;
- int restore;
};
};
struct Text {
- char* font;
+ char* fontstyle;
+ char* fontsize;
int pre;
int pos;
int space;
int output;
- int underline;
+
+ char *bp;
+ char *wp;
+ int nb;
};
void eatwhite(void);
-Tag *parsetext(Text *, Tag *);
+void parsetext(Text *, Tag *);
int parsetag(Tag *);
int parseattr(Attr *);
+void flushtext(Text *);
+char* getattr(Tag *, char *);
+int gotattr(Tag *, char *, char *);
+int gotstyle(Tag *, char *, char *);
+void reparent(Text *, Tag *, Tag *);
+void debugtag(Tag *, char *);
+
+Biobuf in;
+
+void
+emitbuf(Text *text, char *buf, int nbuf)
+{
+ int nw;
+
+ nw = text->wp - text->bp;
+ if((text->nb - nw) < nbuf){
+ if(nbuf < 4096)
+ text->nb = nw + 4096;
+ else
+ text->nb = nw + nbuf;
+ text->bp = realloc(text->bp, text->nb);
+ text->wp = text->bp + nw;
+ }
+ memmove(text->wp, buf, nbuf);
+ text->wp += nbuf;
+}
-Biobuf in, out;
+void
+emitrune(Text *text, Rune r)
+{
+ char buf[UTFmax+1];
+
+ if(r == '\r' || r =='\n'){
+ text->pos = 0;
+ text->space = 0;
+ }else
+ text->pos++;
+ emitbuf(text, buf, runetochar(buf, &r));
+}
void
emit(Text *text, char *fmt, ...)
{
+ Rune buf[64];
va_list a;
+ int i;
- if(text->pos > 0){
- text->pos = 0;
- Bputc(&out, '\n');
- }
+ if(fmt[0] == '.' && text->pos)
+ emitrune(text, '\n');
va_start(a, fmt);
- Bvprint(&out, fmt, a);
+ runevsnprint(buf, nelem(buf), fmt, a);
va_end(a);
+ for(i=0; buf[i]; i++)
+ emitrune(text, buf[i]);
}
void
-restoreoutput(Text *text, Tag *tag)
+restoreoutput(Text *text, Tag *)
{
- text->output = tag->restore;
+ text->output = 1;
}
void
ongarbage(Text *text, Tag *tag)
{
- tag->restore = text->output;
+ if(text->output == 0)
+ return;
tag->close = restoreoutput;
text->output = 0;
}
+void
+onmeta(Text *, Tag *tag)
+{
+ tag->closing = 1;
+}
+
void
onp(Text *text, Tag *)
{
}
void
-restorepre(Text *text, Tag *tag)
+restorepre(Text *text, Tag *)
{
- text->pre = tag->restore;
+ text->pre = 0;
emit(text, ".DE\n");
}
void
onpre(Text *text, Tag *tag)
{
- tag->restore = text->pre;
+ if(text->pre)
+ return;
tag->close = restorepre;
text->pre = 1;
emit(text, ".DS L\n");
void
onh(Text *text, Tag *tag)
{
- emit(text, ".SH %c\n", tag->tag[1]);
+ emit(text, ".SH\n");
tag->close = onp;
}
}
void
-restorefont(Text *text, Tag *tag)
+fontstyle(Text *text, char *style)
+{
+ if(strcmp(text->fontstyle, style) == 0)
+ return;
+ text->fontstyle = style;
+ emit(text, "\\f%s", style);
+}
+
+void
+fontsize(Text *text, char *size)
+{
+ if(strcmp(text->fontsize, size) == 0)
+ return;
+ text->fontsize = size;
+ emit(text, ".%s\n", size);
+}
+
+void
+restorefontstyle(Text *text, Tag *tag)
+{
+ fontstyle(text, tag->aux);
+}
+
+void
+restorefontsize(Text *text, Tag *tag)
+{
+ fontsize(text, tag->aux);
+}
+
+void
+oni(Text *text, Tag *tag)
+{
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "I");
+}
+
+void
+onb(Text *text, Tag *tag)
+{
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "B");
+}
+
+void onsmall(Text *text, Tag *tag);
+void onsup(Text *text, Tag *tag);
+
+void
+onsub(Text *text, Tag *tag)
+{
+ emit(text, "\\v\'0.5\'");
+ if(cistrcmp(tag->tag, "sub") == 0){
+ emit(text, "\\x\'0.5\'");
+ onsmall(text, tag);
+ } else
+ restorefontsize(text, tag);
+ tag->close = onsup;
+}
+
+void
+onsup(Text *text, Tag *tag)
+{
+ emit(text, "\\v\'-0.5\'");
+ if(cistrcmp(tag->tag, "sup") == 0){
+ emit(text, "\\x\'-0.5\'");
+ onsmall(text, tag);
+ }else
+ restorefontsize(text, tag);
+ tag->close = onsub;
+}
+
+/*
+ * this is poor mans CSS handler.
+ */
+void
+onspan(Text *text, Tag *tag)
+{
+ Attr *a;
+
+ if(!tag->opening)
+ return;
+
+ for(a=tag->attr; a < tag->attr+tag->nattr; a++){
+ if(cistrcmp(a->attr, "class") != 0)
+ continue;
+
+ if(cistrcmp(a->val, "bold") == 0){
+ onb(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "italic") == 0){
+ oni(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "subscript") == 0){
+ strcpy(tag->tag, "sub");
+ onsub(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
+ if(cistrcmp(a->val, "superscript") == 0){
+ strcpy(tag->tag, "sup");
+ onsup(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
+ }
+}
+
+void
+ontt(Text *text, Tag *tag)
+{
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "C");
+}
+
+void
+onsmall(Text *text, Tag *tag)
+{
+ tag->aux = text->fontsize;
+ tag->close = restorefontsize;
+ fontsize(text, "SM");
+}
+
+void
+onbig(Text *text, Tag *tag)
+{
+ tag->aux = text->fontsize;
+ tag->close = restorefontsize;
+ fontsize(text, "LG");
+}
+
+void
+endquote(Text *text, Tag *tag)
+{
+ if(cistrcmp(tag->tag, "q") == 0)
+ emitrune(text, '"');
+ emit(text, ".QE\n");
+}
+
+void
+onquote(Text *text, Tag *tag)
+{
+ tag->close = endquote;
+ if(cistrcmp(tag->tag, "q") == 0)
+ emit(text, ".QS\n\"");
+ else
+ emit(text, ".QP\n");
+}
+
+typedef struct Table Table;
+struct Table
+{
+ char *bp;
+ int nb;
+
+ Table *next;
+ Table *prev;
+ int enclose;
+ int brk;
+
+ char fmt[4];
+
+ Text save;
+};
+
+Tag*
+tabletag(Tag *tag)
+{
+ if(tag == nil)
+ return nil;
+ if(cistrcmp(tag->tag, "table") == 0)
+ return tag;
+ return tabletag(tag->up);
+}
+
+void
+dumprows(Text *text, Table *s, Table *e)
+{
+
+ for(; s != e; s = s->next){
+ if(s->enclose)
+ emit(text, "T{\n");
+ if(s->nb <= 0)
+ emit(text, "\\ ");
+ else
+ emitbuf(text, s->bp, s->nb);
+ if(s->enclose)
+ emit(text, "\nT}");
+ emitrune(text, s->brk ? '\n' : '\t');
+ }
+}
+
+void
+endtable(Text *text, Tag *tag)
{
- text->font = tag->aux;
- text->pos += Bprint(&out, "\\f%s", text->font);
+ int i, cols, rows;
+ Table *t, *h, *s;
+ Tag *tt;
+
+ /* reverse list */
+ h = nil;
+ t = tag->aux;
+ for(; t; t = t->prev){
+ t->next = h;
+ h = t;
+ }
+
+ /*
+ * nested table case, add our cells to the next table up.
+ * this is the best we can do, tbl doesnt support nesting
+ */
+ if(tt = tabletag(tag->up)){
+ while(t = h){
+ h = h->next;
+ t->next = nil;
+ t->prev = tt->aux;
+ tt->aux = t;
+ }
+ return;
+ }
+
+ cols = 0;
+ rows = 0;
+ for(i = 0, t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ rows++;
+ if(i > cols)
+ cols = i;
+ i = 0;
+ }
+ }
+
+ i = 0;
+ for(t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ while(i < cols){
+ s = mallocz(sizeof(Table), 1);
+ strcpy(s->fmt, "L");
+ s->brk = t->brk;
+ t->brk = 0;
+ s->next = t->next;
+ t->next = s;
+ i++;
+ }
+ break;
+ }
+ }
+
+ s = h;
+ while(s){
+ emit(text, ".TS\n");
+ if(gotattr(tag, "align", "center"))
+ emit(text, "center ;\n");
+ i = 0;
+ for(t = s; t; t = t->next){
+ emit(text, "%s", t->fmt);
+ if(t->brk){
+ emitrune(text, '\n');
+ if(++i > 30){
+ t = t->next;
+ break;
+ }
+ }else
+ emitrune(text, ' ');
+ }
+ emit(text, ".\n");
+ dumprows(text, s, t);
+ emit(text, ".TE\n");
+ s = t;
+ }
+
+ while(t = h){
+ h = t->next;
+ free(t->bp);
+ free(t);
+ }
}
void
-onfont(Text *text, Tag *tag)
+ontable(Text *, Tag *tag)
{
- if(text->font == 0)
- text->font = "R";
- tag->aux = text->font;
- tag->close = restorefont;
- if(cistrcmp(tag->tag, "i") == 0)
- text->font = "I";
- else if(cistrcmp(tag->tag, "b") == 0)
- text->font = "B";
- text->pos += Bprint(&out, "\\f%s", text->font);
+ tag->aux = nil;
+ tag->close = endtable;
}
void
-ona(Text *text, Tag *)
+endcell(Text *text, Tag *tag)
{
- text->underline = 1;
+ Table *t;
+ Tag *tt;
+ int i;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr") == 0){
+ if(t = tt->aux)
+ t->brk = 1;
+ } else {
+ t = tag->aux;
+ t->bp = text->bp;
+ t->nb = text->wp - text->bp;
+
+ for(i=0; i<t->nb; i++)
+ if(strchr(" \t\r\n", t->bp[i]) == nil)
+ break;
+ if(i > 0){
+ memmove(t->bp, t->bp+i, t->nb - i);
+ t->nb -= i;
+ }
+ while(t->nb > 0 && strchr(" \t\r\n", t->bp[t->nb-1]))
+ t->nb--;
+ if(t->nb < 32){
+ for(i=0; i<t->nb; i++)
+ if(strchr("\t\r\n", t->bp[i]))
+ break;
+ t->enclose = i < t->nb;
+ } else {
+ t->enclose = 1;
+ }
+ if(gotstyle(tag, "text-align", "center") || gotstyle(tt, "text-align", "center"))
+ strcpy(t->fmt, "C");
+ else
+ strcpy(t->fmt, "L");
+ if(strcmp(tag->tag, "th") == 0)
+ strcpy(t->fmt+1, "B");
+ t->prev = tt->aux;
+ tt->aux = t;
+ *text = t->save;
+ }
+}
+
+void
+oncell(Text *text, Tag *tag)
+{
+ Tag *tt;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr")){
+ Table *t;
+
+ tt = tag->up;
+ while(tt && cistrcmp(tt->tag, "tr"))
+ tt = tt->up;
+ if(tt == nil)
+ return;
+ reparent(text, tag, tt);
+
+ t = mallocz(sizeof(*t), 1);
+ t->save = *text;
+ tag->aux = t;
+
+ text->bp = nil;
+ text->wp = nil;
+ text->nb = 0;
+ text->pos = 0;
+ text->space = 0;
+ } else
+ reparent(text, tag, tt);
+ tag->close = endcell;
}
struct {
char *tag;
void (*open)(Text *, Tag *);
} ontag[] = {
- "a", ona,
+ "b", onb,
+ "big", onbig,
+ "blockquote", onquote,
"br", onbr,
- "hr", onbr,
- "b", onfont,
- "i", onfont,
- "p", onp,
+ "cite", oni,
+ "code", ontt,
+ "dfn", oni,
+ "em", oni,
"h1", onh,
"h2", onh,
"h3", onh,
"h4", onh,
"h5", onh,
"h6", onh,
+ "head", ongarbage,
+ "hr", onbr,
+ "i", oni,
+ "img", onmeta,
+ "kbd", ontt,
"li", onli,
+ "link", onmeta,
+ "meta", onmeta,
+ "p", onp,
"pre", onpre,
- "head", ongarbage,
- "style", ongarbage,
+ "q", onquote,
+ "samp", ontt,
"script", ongarbage,
+ "small", onsmall,
+ "strong", onb,
+ "style", ongarbage,
+ "table", ontable,
+ "td", oncell,
+ "th", oncell,
+ "tr", oncell,
+ "sub", onsub,
+ "sup", onsup,
+ "span", onspan,
+ "tt", ontt,
+ "var", oni,
};
void
if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
continue;
while((c = Bgetc(&in)) > 0){
- if(c == ']')
- if(Bgetc(&in) == ']')
- if(Bgetc(&in) == '>')
- return;
+ if(c == ']'){
+ if(Bgetc(&in) == ']'){
+ if(Bgetc(&in) != '>')
+ Bungetc(&in);
+ return;
+ }
+ }
}
}
}
return '>';
if(strcmp(buf, "quot") == 0)
return '"';
+ if(strcmp(buf, "apos") == 0)
+ return '\'';
if(strcmp(buf, "amp") == 0)
return '&';
/* use tcs -f html to handle the rest. */
void
debugtag(Tag *tag, char *dbg)
{
- if(1) return;
+ if(1){
+ USED(tag);
+ USED(dbg);
+ return;
+ }
if(tag == nil)
return;
fprint(2, "%s %s%s", tag->tag, dbg ? dbg : " > ", dbg ? "\n" : "");
}
+char*
+getattr(Tag *tag, char *attr)
+{
+ int i;
-Tag*
+ for(i=0; i<tag->nattr; i++)
+ if(cistrcmp(tag->attr[i].attr, attr) == 0)
+ return tag->attr[i].val;
+ return nil;
+}
+
+int
+gotattr(Tag *tag, char *attr, char *val)
+{
+ char *v;
+
+ if((v = getattr(tag, attr)) == nil)
+ return 0;
+ return cistrstr(v, val) != 0;
+}
+
+int
+gotstyle(Tag *tag, char *style, char *val)
+{
+ char *v;
+
+ if((v = getattr(tag, "style")) == nil)
+ return 0;
+ if((v = cistrstr(v, style)) == nil)
+ return 0;
+ v += strlen(style);
+ while(*v && *v != ':')
+ v++;
+ if(*v != ':')
+ return 0;
+ v++;
+ while(*v && strchr("\t ", *v))
+ v++;
+ if(cistrncmp(v, val, strlen(val)))
+ return 0;
+ return 1;
+}
+
+void
+reparent(Text *text, Tag *tag, Tag *up)
+{
+ Tag *old;
+
+ old = tag->up;
+ while(old != up){
+ debugtag(old, "reparent");
+ if(old->close){
+ old->close(text, old);
+ old->close = nil;
+ }
+ old = old->up;
+ }
+ tag->up = up;
+}
+
+
+void
parsetext(Text *text, Tag *tag)
{
- Tag *rtag;
+ int hidden, c;
+ Tag t, *up;
Rune r;
- int c;
- rtag = tag;
- debugtag(tag, "open");
+ if(tag){
+ up = tag->up;
+ debugtag(tag, "open");
+ for(c = 0; c < nelem(ontag); c++){
+ if(cistrcmp(tag->tag, ontag[c].tag) == 0){
+ ontag[c].open(text, tag);
+ break;
+ }
+ }
+ hidden = getattr(tag, "hidden") || gotstyle(tag, "display", "none");
+ } else {
+ up = nil;
+ hidden = 0;
+ }
if(tag == nil || tag->closing == 0){
while((c = Bgetc(&in)) > 0){
if(c == '<'){
- Tag t;
-
memset(&t, 0, sizeof(t));
if(parsetag(&t)){
if(t.opening){
t.up = tag;
- for(c = 0; c < nelem(ontag); c++){
- if(cistrcmp(t.tag, ontag[c].tag) == 0){
- ontag[c].open(text, &t);
- break;
- }
- }
- rtag = parsetext(text, &t);
- if(rtag == &t)
- rtag = tag;
- else
+ parsetext(text, &t);
+ if(t.up != tag){
+ debugtag(tag, "skip");
+ up = t.up;
break;
+ }
+ debugtag(tag, "back");
} else if(t.closing){
- while(rtag && cistrcmp(rtag->tag, t.tag))
- rtag = rtag->up;
- if(rtag == nil)
- rtag = tag;
- else
+ up = tag;
+ while(up && cistrcmp(up->tag, t.tag))
+ up = up->up;
+ if(up){
+ up = up->up;
break;
+ }
}
}
continue;
}
- if(!text->output)
+ if(hidden || !text->output)
continue;
r = substrune(parserune(c));
switch(r){
case '\t':
if(text->pre == 0){
text->space = 1;
- continue;
+ break;
}
default:
- if(r == '\n' || r == '\r')
- text->pos = 0;
if(text->space){
- text->space = 0;
- if(text->underline){
- emit(text, "");
- text->pos = Bprint(&out, ".UL ");
- } else if(text->pos >= 70){
- text->pos = 0;
- Bputc(&out, '\n');
- } else if(text->pos > 0){
- text->pos++;
- Bputc(&out, ' ');
- }
- }
- if(text->pos == 0 && r == '.'){
- text->pos++;
- Bputc(&out, ' ');
+ if(text->pos >= 70)
+ emitrune(text, '\n');
+ else if(text->pos > 0)
+ emitrune(text, ' ');
}
- text->pos++;
- if(r == 0xA0){
+ if((text->pos == 0 && r == '.') || r == '\\')
+ emit(text, "\\&");
+ if(r == '\\' || r == 0xA0)
+ emitrune(text, '\\');
+ if(r == 0xA0)
r = ' ';
- Bputc(&out, '\\');
- }
- Bprint(&out, "%C", r);
+ emitrune(text, r);
+ text->space = 0;
}
}
}
- debugtag(tag, "close");
- if(tag && tag->close)
- tag->close(text, tag);
- return rtag;
+ if(tag){
+ debugtag(tag, "close");
+ if(tag->close){
+ tag->close(text, tag);
+ tag->close = nil;
+ }
+ if(up)
+ tag->up = up;
+ }
+}
+
+void
+inittext(Text *text)
+{
+ memset(text, 0, sizeof(Text));
+ text->fontstyle = "R";
+ text->fontsize = "NL";
+ text->output = 1;
}
void
main(void)
{
Text text;
-
Binit(&in, 0, OREAD);
- Binit(&out, 1, OWRITE);
-
- memset(&text, 0, sizeof(text));
- text.output = 1;
+ inittext(&text);
parsetext(&text, nil);
emit(&text, "\n");
+ write(1, text.bp, text.wp - text.bp);
}