int pos;
int space;
int output;
+
+ char *bp;
+ char *wp;
+ int nb;
};
void eatwhite(void);
-Tag *parsetext(Text *, Tag *);
+void parsetext(Text *, Tag *);
int parsetag(Tag *);
int parseattr(Attr *);
+void flushtext(Text *);
+char* getattr(Tag *, char *);
+int gotattr(Tag *, char *, char *);
+int gotstyle(Tag *, char *, char *);
+void reparent(Text *, Tag *, Tag *);
+void debugtag(Tag *, char *);
+
+Biobuf in;
-Biobuf in, out;
+void
+emitbuf(Text *text, char *buf, int nbuf)
+{
+ int nw;
+
+ nw = text->wp - text->bp;
+ if((text->nb - nw) < nbuf){
+ if(nbuf < 4096)
+ text->nb = nw + 4096;
+ else
+ text->nb = nw + nbuf;
+ text->bp = realloc(text->bp, text->nb);
+ text->wp = text->bp + nw;
+ }
+ memmove(text->wp, buf, nbuf);
+ text->wp += nbuf;
+}
void
emitrune(Text *text, Rune r)
{
- if(r == '\r' || r =='\n')
+ char buf[UTFmax+1];
+
+ if(r == '\r' || r =='\n'){
text->pos = 0;
- else
+ text->space = 0;
+ }else
text->pos++;
- Bputrune(&out, r);
+ emitbuf(text, buf, runetochar(buf, &r));
}
void
va_list a;
int i;
- if(fmt[0] == '.' && text->pos){
+ if(fmt[0] == '.' && text->pos)
emitrune(text, '\n');
- text->space = 0;
- }
va_start(a, fmt);
runevsnprint(buf, nelem(buf), fmt, a);
va_end(a);
text->output = 0;
}
+void
+onmeta(Text *, Tag *tag)
+{
+ tag->closing = 1;
+}
+
void
onp(Text *text, Tag *)
{
fontstyle(text, "B");
}
+void onsmall(Text *text, Tag *tag);
+void onsup(Text *text, Tag *tag);
+
+void
+onsub(Text *text, Tag *tag)
+{
+ emit(text, "\\v\'0.5\'");
+ if(cistrcmp(tag->tag, "sub") == 0){
+ emit(text, "\\x\'0.5\'");
+ onsmall(text, tag);
+ } else
+ restorefontsize(text, tag);
+ tag->close = onsup;
+}
+
+void
+onsup(Text *text, Tag *tag)
+{
+ emit(text, "\\v\'-0.5\'");
+ if(cistrcmp(tag->tag, "sup") == 0){
+ emit(text, "\\x\'-0.5\'");
+ onsmall(text, tag);
+ }else
+ restorefontsize(text, tag);
+ tag->close = onsub;
+}
+
+/*
+ * this is poor mans CSS handler.
+ */
+void
+onspan(Text *text, Tag *tag)
+{
+ Attr *a;
+
+ if(!tag->opening)
+ return;
+
+ for(a=tag->attr; a < tag->attr+tag->nattr; a++){
+ if(cistrcmp(a->attr, "class") != 0)
+ continue;
+
+ if(cistrcmp(a->val, "bold") == 0){
+ onb(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "italic") == 0){
+ oni(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "subscript") == 0){
+ strcpy(tag->tag, "sub");
+ onsub(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
+ if(cistrcmp(a->val, "superscript") == 0){
+ strcpy(tag->tag, "sup");
+ onsup(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
+ }
+}
+
void
ontt(Text *text, Tag *tag)
{
- tag->aux = text->fontsize;
- tag->close = restorefontsize;
- fontsize(text, "CW");
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "C");
}
void
emit(text, ".QP\n");
}
+typedef struct Table Table;
+struct Table
+{
+ char *bp;
+ int nb;
+
+ Table *next;
+ Table *prev;
+ int enclose;
+ int brk;
+
+ char fmt[4];
+
+ Text save;
+};
+
+Tag*
+tabletag(Tag *tag)
+{
+ if(tag == nil)
+ return nil;
+ if(cistrcmp(tag->tag, "table") == 0)
+ return tag;
+ return tabletag(tag->up);
+}
+
+void
+dumprows(Text *text, Table *s, Table *e)
+{
+
+ for(; s != e; s = s->next){
+ if(s->enclose)
+ emit(text, "T{\n");
+ if(s->nb <= 0)
+ emit(text, "\\ ");
+ else
+ emitbuf(text, s->bp, s->nb);
+ if(s->enclose)
+ emit(text, "\nT}");
+ emitrune(text, s->brk ? '\n' : '\t');
+ }
+}
+
+void
+endtable(Text *text, Tag *tag)
+{
+ int i, cols, rows;
+ Table *t, *h, *s;
+ Tag *tt;
+
+ /* reverse list */
+ h = nil;
+ t = tag->aux;
+ for(; t; t = t->prev){
+ t->next = h;
+ h = t;
+ }
+
+ /*
+ * nested table case, add our cells to the next table up.
+ * this is the best we can do, tbl doesnt support nesting
+ */
+ if(tt = tabletag(tag->up)){
+ while(t = h){
+ h = h->next;
+ t->next = nil;
+ t->prev = tt->aux;
+ tt->aux = t;
+ }
+ return;
+ }
+
+ cols = 0;
+ rows = 0;
+ for(i = 0, t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ rows++;
+ if(i > cols)
+ cols = i;
+ i = 0;
+ }
+ }
+
+ i = 0;
+ for(t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ while(i < cols){
+ s = mallocz(sizeof(Table), 1);
+ strcpy(s->fmt, "L");
+ s->brk = t->brk;
+ t->brk = 0;
+ s->next = t->next;
+ t->next = s;
+ i++;
+ }
+ break;
+ }
+ }
+
+ s = h;
+ while(s){
+ emit(text, ".TS\n");
+ if(gotattr(tag, "align", "center"))
+ emit(text, "center ;\n");
+ i = 0;
+ for(t = s; t; t = t->next){
+ emit(text, "%s", t->fmt);
+ if(t->brk){
+ emitrune(text, '\n');
+ if(++i > 30){
+ t = t->next;
+ break;
+ }
+ }else
+ emitrune(text, ' ');
+ }
+ emit(text, ".\n");
+ dumprows(text, s, t);
+ emit(text, ".TE\n");
+ s = t;
+ }
+
+ while(t = h){
+ h = t->next;
+ free(t->bp);
+ free(t);
+ }
+}
+
+void
+ontable(Text *, Tag *tag)
+{
+ tag->aux = nil;
+ tag->close = endtable;
+}
+
+void
+endcell(Text *text, Tag *tag)
+{
+ Table *t;
+ Tag *tt;
+ int i;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr") == 0){
+ if(t = tt->aux)
+ t->brk = 1;
+ } else {
+ t = tag->aux;
+ t->bp = text->bp;
+ t->nb = text->wp - text->bp;
+
+ for(i=0; i<t->nb; i++)
+ if(strchr(" \t\r\n", t->bp[i]) == nil)
+ break;
+ if(i > 0){
+ memmove(t->bp, t->bp+i, t->nb - i);
+ t->nb -= i;
+ }
+ while(t->nb > 0 && strchr(" \t\r\n", t->bp[t->nb-1]))
+ t->nb--;
+ if(t->nb < 32){
+ for(i=0; i<t->nb; i++)
+ if(strchr("\t\r\n", t->bp[i]))
+ break;
+ t->enclose = i < t->nb;
+ } else {
+ t->enclose = 1;
+ }
+ if(gotstyle(tag, "text-align", "center") || gotstyle(tt, "text-align", "center"))
+ strcpy(t->fmt, "C");
+ else
+ strcpy(t->fmt, "L");
+ if(strcmp(tag->tag, "th") == 0)
+ strcpy(t->fmt+1, "B");
+ t->prev = tt->aux;
+ tt->aux = t;
+ *text = t->save;
+ }
+}
+
+void
+oncell(Text *text, Tag *tag)
+{
+ Tag *tt;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr")){
+ Table *t;
+
+ tt = tag->up;
+ while(tt && cistrcmp(tt->tag, "tr"))
+ tt = tt->up;
+ if(tt == nil)
+ return;
+ reparent(text, tag, tt);
+
+ t = mallocz(sizeof(*t), 1);
+ t->save = *text;
+ tag->aux = t;
+
+ text->bp = nil;
+ text->wp = nil;
+ text->nb = 0;
+ text->pos = 0;
+ text->space = 0;
+ } else
+ reparent(text, tag, tt);
+ tag->close = endcell;
+}
+
struct {
char *tag;
void (*open)(Text *, Tag *);
"head", ongarbage,
"hr", onbr,
"i", oni,
+ "img", onmeta,
"kbd", ontt,
"li", onli,
+ "link", onmeta,
+ "meta", onmeta,
"p", onp,
"pre", onpre,
"q", onquote,
"small", onsmall,
"strong", onb,
"style", ongarbage,
+ "table", ontable,
+ "td", oncell,
+ "th", oncell,
+ "tr", oncell,
+ "sub", onsub,
+ "sup", onsup,
+ "span", onspan,
"tt", ontt,
"var", oni,
};
return '>';
if(strcmp(buf, "quot") == 0)
return '"';
+ if(strcmp(buf, "apos") == 0)
+ return '\'';
if(strcmp(buf, "amp") == 0)
return '&';
/* use tcs -f html to handle the rest. */
void
debugtag(Tag *tag, char *dbg)
{
- if(1) return;
+ if(1){
+ USED(tag);
+ USED(dbg);
+ return;
+ }
if(tag == nil)
return;
fprint(2, "%s %s%s", tag->tag, dbg ? dbg : " > ", dbg ? "\n" : "");
}
+char*
+getattr(Tag *tag, char *attr)
+{
+ int i;
+
+ for(i=0; i<tag->nattr; i++)
+ if(cistrcmp(tag->attr[i].attr, attr) == 0)
+ return tag->attr[i].val;
+ return nil;
+}
-Tag*
+int
+gotattr(Tag *tag, char *attr, char *val)
+{
+ char *v;
+
+ if((v = getattr(tag, attr)) == nil)
+ return 0;
+ return cistrstr(v, val) != 0;
+}
+
+int
+gotstyle(Tag *tag, char *style, char *val)
+{
+ char *v;
+
+ if((v = getattr(tag, "style")) == nil)
+ return 0;
+ if((v = cistrstr(v, style)) == nil)
+ return 0;
+ v += strlen(style);
+ while(*v && *v != ':')
+ v++;
+ if(*v != ':')
+ return 0;
+ v++;
+ while(*v && strchr("\t ", *v))
+ v++;
+ if(cistrncmp(v, val, strlen(val)))
+ return 0;
+ return 1;
+}
+
+void
+reparent(Text *text, Tag *tag, Tag *up)
+{
+ Tag *old;
+
+ old = tag->up;
+ while(old != up){
+ debugtag(old, "reparent");
+ if(old->close){
+ old->close(text, old);
+ old->close = nil;
+ }
+ old = old->up;
+ }
+ tag->up = up;
+}
+
+
+void
parsetext(Text *text, Tag *tag)
{
- Tag *rtag;
+ int hidden, c;
+ Tag t, *up;
Rune r;
- int c;
- rtag = tag;
- debugtag(tag, "open");
+ if(tag){
+ up = tag->up;
+ debugtag(tag, "open");
+ for(c = 0; c < nelem(ontag); c++){
+ if(cistrcmp(tag->tag, ontag[c].tag) == 0){
+ ontag[c].open(text, tag);
+ break;
+ }
+ }
+ hidden = getattr(tag, "hidden") || gotstyle(tag, "display", "none");
+ } else {
+ up = nil;
+ hidden = 0;
+ }
if(tag == nil || tag->closing == 0){
while((c = Bgetc(&in)) > 0){
if(c == '<'){
- Tag t;
-
memset(&t, 0, sizeof(t));
if(parsetag(&t)){
if(t.opening){
t.up = tag;
- for(c = 0; c < nelem(ontag); c++){
- if(cistrcmp(t.tag, ontag[c].tag) == 0){
- ontag[c].open(text, &t);
- break;
- }
- }
- rtag = parsetext(text, &t);
- if(rtag == &t)
- rtag = tag;
- else
+ parsetext(text, &t);
+ if(t.up != tag){
+ debugtag(tag, "skip");
+ up = t.up;
break;
+ }
+ debugtag(tag, "back");
} else if(t.closing){
- while(rtag && cistrcmp(rtag->tag, t.tag))
- rtag = rtag->up;
- if(rtag == nil)
- rtag = tag;
- else
+ up = tag;
+ while(up && cistrcmp(up->tag, t.tag))
+ up = up->up;
+ if(up){
+ up = up->up;
break;
+ }
}
}
continue;
}
- if(!text->output)
+ if(hidden || !text->output)
continue;
r = substrune(parserune(c));
switch(r){
}
default:
if(text->space){
- text->space = 0;
if(text->pos >= 70)
emitrune(text, '\n');
else if(text->pos > 0)
if(r == 0xA0)
r = ' ';
emitrune(text, r);
+ text->space = 0;
}
}
}
- debugtag(tag, "close");
- if(tag && tag->close)
- tag->close(text, tag);
- return rtag;
+ if(tag){
+ debugtag(tag, "close");
+ if(tag->close){
+ tag->close(text, tag);
+ tag->close = nil;
+ }
+ if(up)
+ tag->up = up;
+ }
+}
+
+void
+inittext(Text *text)
+{
+ memset(text, 0, sizeof(Text));
+ text->fontstyle = "R";
+ text->fontsize = "NL";
+ text->output = 1;
}
void
main(void)
{
Text text;
-
Binit(&in, 0, OREAD);
- Binit(&out, 1, OWRITE);
-
- memset(&text, 0, sizeof(text));
-
- text.fontstyle = "R";
- text.fontsize = "NL";
- text.output = 1;
-
+ inittext(&text);
parsetext(&text, nil);
emit(&text, "\n");
+ write(1, text.bp, text.wp - text.bp);
}