#include <ctype.h>
#include <bio.h>
-enum
-{
- SSIZE = 10,
+typedef struct Tag Tag;
+typedef struct Attr Attr;
+typedef struct Text Text;
+
+struct Attr {
+ char attr[64];
+ char val[256-64];
+};
- /* list types */
- Lordered = 0,
- Lunordered,
- Lmenu,
- Ldir,
+struct Tag {
+ Tag *up;
+ char tag[32];
+ Attr attr[16];
+ int nattr;
+ int opening;
+ int closing;
+
+ void (*close)(Text *, Tag *);
+ union {
+ void *aux;
+ };
+};
+struct Text {
+ char* fontstyle;
+ char* fontsize;
+ int pre;
+ int pos;
+ int space;
+ int output;
+
+ char *bp;
+ char *wp;
+ int nb;
};
-Biobuf in, out;
-int lastc = '\n';
-int inpre = 0;
+void eatwhite(void);
+void parsetext(Text *, Tag *);
+int parsetag(Tag *);
+int parseattr(Attr *);
+void flushtext(Text *);
+char* getattr(Tag *, char *);
+int gotattr(Tag *, char *, char *);
+int gotstyle(Tag *, char *, char *);
+void reparent(Text *, Tag *, Tag *);
+void debugtag(Tag *, char *);
-/* stack for fonts */
-char *fontstack[SSIZE];
-char *font = "R";
-int fsp;
+Biobuf in;
+
+void
+emitbuf(Text *text, char *buf, int nbuf)
+{
+ int nw;
+
+ nw = text->wp - text->bp;
+ if((text->nb - nw) < nbuf){
+ if(nbuf < 4096)
+ text->nb = nw + 4096;
+ else
+ text->nb = nw + nbuf;
+ text->bp = realloc(text->bp, text->nb);
+ text->wp = text->bp + nw;
+ }
+ memmove(text->wp, buf, nbuf);
+ text->wp += nbuf;
+}
-/* stack for lists */
-struct
+void
+emitrune(Text *text, Rune r)
{
- int type;
- int ord;
-} liststack[SSIZE];
-int lsp;
+ char buf[UTFmax+1];
+
+ if(r == '\r' || r =='\n'){
+ text->pos = 0;
+ text->space = 0;
+ }else
+ text->pos++;
+ emitbuf(text, buf, runetochar(buf, &r));
+}
-int quoting;
+void
+emit(Text *text, char *fmt, ...)
+{
+ Rune buf[64];
+ va_list a;
+ int i;
+
+ if(fmt[0] == '.' && text->pos)
+ emitrune(text, '\n');
+ va_start(a, fmt);
+ runevsnprint(buf, nelem(buf), fmt, a);
+ va_end(a);
+ for(i=0; buf[i]; i++)
+ emitrune(text, buf[i]);
+}
-typedef struct Goobie Goobie;
-struct Goobie
+void
+restoreoutput(Text *text, Tag *)
{
- char *name;
- void (*f)(Goobie*, char*);
- void (*ef)(Goobie*, char*);
-};
+ text->output = 1;
+}
-void eatwhite(void);
-void escape(void);
-
-typedef void Action(Goobie*, char*);
-
-Action g_ignore;
-Action g_unexpected;
-Action g_title;
-Action g_p;
-Action g_h;
-Action g_li;
-Action g_list, g_listend;
-Action g_pre;
-Action g_fpush, g_fpop;
-Action g_indent, g_exdent;
-Action g_dt;
-Action g_display;
-Action g_displayend;
-Action g_table, g_tableend, g_caption, g_captionend;
-Action g_br, g_hr;
-
-Goobie gtab[] =
-{
- "!--", g_ignore, g_unexpected,
- "!doctype", g_ignore, g_unexpected,
- "a", g_ignore, g_ignore,
- "address", g_display, g_displayend,
- "b", g_fpush, g_fpop,
- "base", g_ignore, g_unexpected,
- "blink", g_ignore, g_ignore,
- "blockquote", g_ignore, g_ignore,
- "body", g_ignore, g_ignore,
- "br", g_br, g_unexpected,
- "caption", g_caption, g_captionend,
- "center", g_ignore, g_ignore,
- "cite", g_ignore, g_ignore,
- "code", g_ignore, g_ignore,
- "dd", g_ignore, g_unexpected,
- "dfn", g_ignore, g_ignore,
- "dir", g_list, g_listend,
- "dl", g_indent, g_exdent,
- "dt", g_dt, g_unexpected,
- "em", g_ignore, g_ignore,
- "font", g_ignore, g_ignore,
- "form", g_ignore, g_ignore,
- "h1", g_h, g_p,
- "h2", g_h, g_p,
- "h3", g_h, g_p,
- "h4", g_h, g_p,
- "h5", g_h, g_p,
- "h6", g_h, g_p,
- "head", g_ignore, g_ignore,
- "hr", g_hr, g_unexpected,
- "html", g_ignore, g_ignore,
- "i", g_fpush, g_fpop,
- "input", g_ignore, g_unexpected,
- "img", g_ignore, g_unexpected,
- "isindex", g_ignore, g_unexpected,
- "kbd", g_fpush, g_fpop,
- "key", g_ignore, g_ignore,
- "li", g_li, g_unexpected,
- "link", g_ignore, g_unexpected,
- "listing", g_ignore, g_ignore,
- "menu", g_list, g_listend,
- "meta", g_ignore, g_unexpected,
- "nextid", g_ignore, g_unexpected,
- "ol", g_list, g_listend,
- "option", g_ignore, g_unexpected,
- "p", g_p, g_ignore,
- "plaintext", g_ignore, g_unexpected,
- "pre", g_pre, g_displayend,
- "samp", g_ignore, g_ignore,
- "select", g_ignore, g_ignore,
- "strong", g_ignore, g_ignore,
- "table", g_table, g_tableend,
- "textarea", g_ignore, g_ignore,
- "title", g_title, g_ignore,
- "tt", g_fpush, g_fpop,
- "u", g_ignore, g_ignore,
- "ul", g_list, g_listend,
- "var", g_ignore, g_ignore,
- "xmp", g_ignore, g_ignore,
- 0, 0, 0,
-};
+void
+ongarbage(Text *text, Tag *tag)
+{
+ if(text->output == 0)
+ return;
+ tag->close = restoreoutput;
+ text->output = 0;
+}
-typedef struct Entity Entity;
-struct Entity
+void
+onmeta(Text *, Tag *tag)
{
- char *name;
- Rune value;
-};
+ tag->closing = 1;
+}
-Entity pl_entity[]=
-{
-"#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"',
-"AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å',
-"Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É',
-"Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î',
-"Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô',
-"Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ',
-"Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý',
-"aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&',
-"aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é',
-"ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>',
-"iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<',
-"ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø',
-"otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú',
-"ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ',
-0
-};
+void
+onp(Text *text, Tag *)
+{
+ emit(text, ".LP\n");
+}
-int
-cistrcmp(char *a, char *b)
+void
+restorepre(Text *text, Tag *)
{
- int c, d;
+ text->pre = 0;
+ emit(text, ".DE\n");
+}
- for(;; a++, b++){
- d = tolower(*a);
- c = d - tolower(*b);
- if(c)
- break;
- if(d == 0)
- break;
- }
- return c;
+void
+onpre(Text *text, Tag *tag)
+{
+ if(text->pre)
+ return;
+ tag->close = restorepre;
+ text->pre = 1;
+ emit(text, ".DS L\n");
}
-int
-readupto(char *buf, int n, char d, char notme)
+void
+onli(Text *text, Tag *tag)
{
- char *p;
- int c;
+ if(tag->up && cistrcmp(tag->up->tag, "ol") == 0)
+ emit(text, ".IP\n");
+ else
+ emit(text, ".IP \\(bu\n");
+ if(tag->up)
+ tag->up->close = onp;
+}
- buf[0] = 0;
- for(p = buf;; p++){
- c = Bgetc(&in);
- if(c < 0){
- *p = 0;
- return -1;
- }
- if(c == notme){
- Bungetc(&in);
- return -1;
- }
- if(c == d){
- *p = 0;
- return 0;
- }
- *p = c;
- if(p == buf + n){
- *p = 0;
- Bprint(&out, "<%s", buf);
- return -1;
- }
- }
+void
+onh(Text *text, Tag *tag)
+{
+ emit(text, ".SH\n");
+ tag->close = onp;
}
void
-dogoobie(void)
+onbr(Text *text, Tag *tag)
{
- char *arg, *type;
- Goobie *g;
- char buf[1024];
- int closing;
+ tag->closing = 1;
+ emit(text, ".br\n");
+ if(cistrcmp(tag->tag, "hr") == 0)
+ emit(text, "\\l'5i'\n.br\n");
+}
- if(readupto(buf, sizeof(buf), '>', '<') < 0){
- Bprint(&out, "<%s", buf);
+void
+fontstyle(Text *text, char *style)
+{
+ if(strcmp(text->fontstyle, style) == 0)
return;
- }
- type = buf;
- if(*type == '/'){
- type++;
- closing = 1;
- } else
- closing = 0;
- arg = strchr(type, ' ');
- if(arg == 0)
- arg = strchr(type, '\r');
- if(arg == 0)
- arg = strchr(type, '\n');
- if(arg)
- *arg++ = 0;
- for(g = gtab; g->name; g++)
- if(cistrcmp(type, g->name) == 0){
- if(closing){
- if(g->ef){
- (*g->ef)(g, arg);
- return;
- }
- } else {
- if(g->f){
- (*g->f)(g, arg);
- return;
- }
- }
- }
- if(closing)
- type--;
- if(arg)
- Bprint(&out, "<%s %s>\n", type, arg);
- else
- Bprint(&out, "<%s>\n", type);
+ text->fontstyle = style;
+ emit(text, "\\f%s", style);
}
void
-main(void)
+fontsize(Text *text, char *size)
{
- int c, pos;
+ if(strcmp(text->fontsize, size) == 0)
+ return;
+ text->fontsize = size;
+ emit(text, ".%s\n", size);
+}
- Binit(&in, 0, OREAD);
- Binit(&out, 1, OWRITE);
+void
+restorefontstyle(Text *text, Tag *tag)
+{
+ fontstyle(text, tag->aux);
+}
- pos = 0;
- for(;;){
- c = Bgetc(&in);
- if(c < 0)
- return;
- switch(c){
- case '<':
- dogoobie();
- break;
- case '&':
- escape();
- break;
- case '\r':
- pos = 0;
- break;
- case '\n':
- if(quoting){
- Bputc(&out, '"');
- quoting = 0;
- }
- if(lastc != '\n')
- Bputc(&out, '\n');
- /* can't emit leading spaces in filled troff docs */
- if (!inpre)
- eatwhite();
- lastc = c;
- break;
- default:
- ++pos;
- if(!inpre && isascii(c) && isspace(c) && pos > 80){
- Bputc(&out, '\n');
- eatwhite();
- pos = 0;
- }else
- Bputc(&out, c);
- lastc = c;
- break;
- }
- }
+void
+restorefontsize(Text *text, Tag *tag)
+{
+ fontsize(text, tag->aux);
}
void
-escape(void)
+oni(Text *text, Tag *tag)
{
- int c;
- Entity *e;
- char buf[8];
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "I");
+}
- if(readupto(buf, sizeof(buf), ';', '\n') < 0){
- Bprint(&out, "&%s", buf);
- return;
- }
- for(e = pl_entity; e->name; e++)
- if(strcmp(buf, e->name) == 0){
- Bprint(&out, "%C", e->value);
- return;
- }
- if(*buf == '#'){
- c = atoi(buf+1);
- if(isascii(c) && isprint(c)){
- Bputc(&out, c);
- return;
- }
- }
- Bprint(&out, "&%s;", buf);
+void
+onb(Text *text, Tag *tag)
+{
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "B");
}
-/*
- * whitespace is not significant to HTML, but newlines
- * and leading spaces are significant to troff.
- */
+void onsmall(Text *text, Tag *tag);
+void onsup(Text *text, Tag *tag);
+
void
-eatwhite(void)
+onsub(Text *text, Tag *tag)
{
- int c;
+ emit(text, "\\v\'0.5\'");
+ if(cistrcmp(tag->tag, "sub") == 0){
+ emit(text, "\\x\'0.5\'");
+ onsmall(text, tag);
+ } else
+ restorefontsize(text, tag);
+ tag->close = onsup;
+}
- for(;;){
- c = Bgetc(&in);
- if(c < 0)
- break;
- if(!isspace(c)){
- Bungetc(&in);
- break;
- }
- }
+void
+onsup(Text *text, Tag *tag)
+{
+ emit(text, "\\v\'-0.5\'");
+ if(cistrcmp(tag->tag, "sup") == 0){
+ emit(text, "\\x\'-0.5\'");
+ onsmall(text, tag);
+ }else
+ restorefontsize(text, tag);
+ tag->close = onsub;
}
/*
- * print at start of line
+ * this is poor mans CSS handler.
*/
void
-printsol(char *fmt, ...)
+onspan(Text *text, Tag *tag)
{
- va_list arg;
+ Attr *a;
- if(quoting){
- Bputc(&out, '"');
- quoting = 0;
+ if(!tag->opening)
+ return;
+
+ for(a=tag->attr; a < tag->attr+tag->nattr; a++){
+ if(cistrcmp(a->attr, "class") != 0)
+ continue;
+
+ if(cistrcmp(a->val, "bold") == 0){
+ onb(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "italic") == 0){
+ oni(text, tag);
+ return;
+ }
+ if(cistrcmp(a->val, "subscript") == 0){
+ strcpy(tag->tag, "sub");
+ onsub(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
+ if(cistrcmp(a->val, "superscript") == 0){
+ strcpy(tag->tag, "sup");
+ onsup(text, tag);
+ strcpy(tag->tag, "span");
+ return;
+ }
}
- if(lastc != '\n')
- Bputc(&out, '\n');
- va_start(arg, fmt);
- Bvprint(&out, fmt, arg);
- va_end(arg);
- lastc = '\n';
}
void
-g_ignore(Goobie *g, char *arg)
+ontt(Text *text, Tag *tag)
{
- USED(g, arg);
+ tag->aux = text->fontstyle;
+ tag->close = restorefontstyle;
+ fontstyle(text, "C");
}
void
-g_unexpected(Goobie *g, char *arg)
+onsmall(Text *text, Tag *tag)
{
- USED(arg);
- fprint(2, "unexpected %s ending\n", g->name);
+ tag->aux = text->fontsize;
+ tag->close = restorefontsize;
+ fontsize(text, "SM");
}
void
-g_title(Goobie *g, char *arg)
+onbig(Text *text, Tag *tag)
{
- USED(arg);
- printsol(".TL\n", g->name);
+ tag->aux = text->fontsize;
+ tag->close = restorefontsize;
+ fontsize(text, "LG");
}
void
-g_p(Goobie *g, char *arg)
+endquote(Text *text, Tag *tag)
{
- USED(arg);
- printsol(".LP\n", g->name);
+ if(cistrcmp(tag->tag, "q") == 0)
+ emitrune(text, '"');
+ emit(text, ".QE\n");
}
void
-g_h(Goobie *g, char *arg)
+onquote(Text *text, Tag *tag)
{
- USED(arg);
- printsol(".SH %c\n", g->name[1]);
+ tag->close = endquote;
+ if(cistrcmp(tag->tag, "q") == 0)
+ emit(text, ".QS\n\"");
+ else
+ emit(text, ".QP\n");
}
-void
-g_list(Goobie *g, char *arg)
+typedef struct Table Table;
+struct Table
{
- USED(arg);
+ char *bp;
+ int nb;
- if(lsp != SSIZE){
- switch(g->name[0]){
- case 'o':
- liststack[lsp].type = Lordered;
- liststack[lsp].ord = 0;
- break;
- default:
- liststack[lsp].type = Lunordered;
- break;
- }
- }
- lsp++;
+ Table *next;
+ Table *prev;
+ int enclose;
+ int brk;
+
+ char fmt[4];
+
+ Text save;
+};
+
+Tag*
+tabletag(Tag *tag)
+{
+ if(tag == nil)
+ return nil;
+ if(cistrcmp(tag->tag, "table") == 0)
+ return tag;
+ return tabletag(tag->up);
}
void
-g_br(Goobie *g, char *arg)
-{
- USED(g, arg);
- printsol(".br\n");
+dumprows(Text *text, Table *s, Table *e)
+{
+
+ for(; s != e; s = s->next){
+ if(s->enclose)
+ emit(text, "T{\n");
+ if(s->nb <= 0)
+ emit(text, "\\ ");
+ else
+ emitbuf(text, s->bp, s->nb);
+ if(s->enclose)
+ emit(text, "\nT}");
+ emitrune(text, s->brk ? '\n' : '\t');
+ }
}
void
-g_li(Goobie *g, char *arg)
-{
- USED(g, arg);
- if(lsp <= 0 || lsp > SSIZE){
- printsol(".IP \\(bu\n");
+endtable(Text *text, Tag *tag)
+{
+ int i, cols, rows;
+ Table *t, *h, *s;
+ Tag *tt;
+
+ /* reverse list */
+ h = nil;
+ t = tag->aux;
+ for(; t; t = t->prev){
+ t->next = h;
+ h = t;
+ }
+
+ /*
+ * nested table case, add our cells to the next table up.
+ * this is the best we can do, tbl doesnt support nesting
+ */
+ if(tt = tabletag(tag->up)){
+ while(t = h){
+ h = h->next;
+ t->next = nil;
+ t->prev = tt->aux;
+ tt->aux = t;
+ }
return;
}
- switch(liststack[lsp-1].type){
- case Lunordered:
- printsol(".IP \\(bu\n");
- break;
- case Lordered:
- printsol(".IP %d\n", ++liststack[lsp-1].ord);
- break;
+
+ cols = 0;
+ rows = 0;
+ for(i = 0, t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ rows++;
+ if(i > cols)
+ cols = i;
+ i = 0;
+ }
+ }
+
+ i = 0;
+ for(t = h; t; t = t->next){
+ i++;
+ if(t->brk){
+ while(i < cols){
+ s = mallocz(sizeof(Table), 1);
+ strcpy(s->fmt, "L");
+ s->brk = t->brk;
+ t->brk = 0;
+ s->next = t->next;
+ t->next = s;
+ i++;
+ }
+ break;
+ }
+ }
+
+ s = h;
+ while(s){
+ emit(text, ".TS\n");
+ if(gotattr(tag, "align", "center"))
+ emit(text, "center ;\n");
+ i = 0;
+ for(t = s; t; t = t->next){
+ emit(text, "%s", t->fmt);
+ if(t->brk){
+ emitrune(text, '\n');
+ if(++i > 30){
+ t = t->next;
+ break;
+ }
+ }else
+ emitrune(text, ' ');
+ }
+ emit(text, ".\n");
+ dumprows(text, s, t);
+ emit(text, ".TE\n");
+ s = t;
+ }
+
+ while(t = h){
+ h = t->next;
+ free(t->bp);
+ free(t);
}
}
void
-g_listend(Goobie *g, char *arg)
+ontable(Text *, Tag *tag)
{
- USED(g, arg);
- if(--lsp < 0)
- lsp = 0;
- printsol(".LP\n");
+ tag->aux = nil;
+ tag->close = endtable;
}
void
-g_display(Goobie *g, char *arg)
+endcell(Text *text, Tag *tag)
{
- USED(g, arg);
- printsol(".DS\n");
+ Table *t;
+ Tag *tt;
+ int i;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr") == 0){
+ if(t = tt->aux)
+ t->brk = 1;
+ } else {
+ t = tag->aux;
+ t->bp = text->bp;
+ t->nb = text->wp - text->bp;
+
+ for(i=0; i<t->nb; i++)
+ if(strchr(" \t\r\n", t->bp[i]) == nil)
+ break;
+ if(i > 0){
+ memmove(t->bp, t->bp+i, t->nb - i);
+ t->nb -= i;
+ }
+ while(t->nb > 0 && strchr(" \t\r\n", t->bp[t->nb-1]))
+ t->nb--;
+ if(t->nb < 32){
+ for(i=0; i<t->nb; i++)
+ if(strchr("\t\r\n", t->bp[i]))
+ break;
+ t->enclose = i < t->nb;
+ } else {
+ t->enclose = 1;
+ }
+ if(gotstyle(tag, "text-align", "center") || gotstyle(tt, "text-align", "center"))
+ strcpy(t->fmt, "C");
+ else
+ strcpy(t->fmt, "L");
+ if(strcmp(tag->tag, "th") == 0)
+ strcpy(t->fmt+1, "B");
+ t->prev = tt->aux;
+ tt->aux = t;
+ *text = t->save;
+ }
}
void
-g_pre(Goobie *g, char *arg)
+oncell(Text *text, Tag *tag)
{
- USED(g, arg);
- printsol(".DS L\n");
- inpre = 1;
+ Tag *tt;
+
+ if((tt = tabletag(tag)) == nil)
+ return;
+ if(cistrcmp(tag->tag, "tr")){
+ Table *t;
+
+ tt = tag->up;
+ while(tt && cistrcmp(tt->tag, "tr"))
+ tt = tt->up;
+ if(tt == nil)
+ return;
+ reparent(text, tag, tt);
+
+ t = mallocz(sizeof(*t), 1);
+ t->save = *text;
+ tag->aux = t;
+
+ text->bp = nil;
+ text->wp = nil;
+ text->nb = 0;
+ text->pos = 0;
+ text->space = 0;
+ } else
+ reparent(text, tag, tt);
+ tag->close = endcell;
}
+struct {
+ char *tag;
+ void (*open)(Text *, Tag *);
+} ontag[] = {
+ "b", onb,
+ "big", onbig,
+ "blockquote", onquote,
+ "br", onbr,
+ "cite", oni,
+ "code", ontt,
+ "dfn", oni,
+ "em", oni,
+ "h1", onh,
+ "h2", onh,
+ "h3", onh,
+ "h4", onh,
+ "h5", onh,
+ "h6", onh,
+ "head", ongarbage,
+ "hr", onbr,
+ "i", oni,
+ "img", onmeta,
+ "kbd", ontt,
+ "li", onli,
+ "link", onmeta,
+ "meta", onmeta,
+ "p", onp,
+ "pre", onpre,
+ "q", onquote,
+ "samp", ontt,
+ "script", ongarbage,
+ "small", onsmall,
+ "strong", onb,
+ "style", ongarbage,
+ "table", ontable,
+ "td", oncell,
+ "th", oncell,
+ "tr", oncell,
+ "sub", onsub,
+ "sup", onsup,
+ "span", onspan,
+ "tt", ontt,
+ "var", oni,
+};
+
void
-g_displayend(Goobie *g, char *arg)
+eatwhite(void)
{
- USED(g, arg);
- printsol(".DE\n");
- inpre = 0;
+ int c;
+
+ while((c = Bgetc(&in)) > 0){
+ if(strchr("\n\r\t ", c) == nil){
+ Bungetc(&in);
+ return;
+ }
+ }
}
void
-g_fpush(Goobie *g, char *arg)
+parsecomment(void)
{
- USED(arg);
- if(fsp < SSIZE)
- fontstack[fsp] = font;
- fsp++;
- switch(g->name[0]){
- case 'b':
- font = "B";
- break;
- case 'i':
- font = "I";
- break;
- case 'k': /* kbd */
- case 't': /* tt */
- font = "(CW";
- break;
+ char buf[64];
+ int n, c;
+
+ n = 0;
+ eatwhite();
+ while((c = Bgetc(&in)) > 0){
+ if(c == '>')
+ return;
+ if(n == 0 && c == '-'){
+ while((c = Bgetc(&in)) > 0){
+ if(c == '-')
+ if(Bgetc(&in) == '-')
+ if(Bgetc(&in) == '>')
+ return;
+ }
+ }
+ if(n+1 < sizeof(buf)){
+ buf[n++] = c;
+ if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
+ continue;
+ while((c = Bgetc(&in)) > 0){
+ if(c == ']'){
+ if(Bgetc(&in) == ']'){
+ if(Bgetc(&in) != '>')
+ Bungetc(&in);
+ return;
+ }
+ }
+ }
+ }
}
- Bprint(&out, "\\f%s", font);
}
-void
-g_fpop(Goobie *g, char *arg)
+int
+parseattr(Attr *a)
{
- USED(g, arg);
- fsp--;
- if(fsp < SSIZE)
- font = fontstack[fsp];
- else
- font = "R";
+ int q, c, n;
- Bprint(&out, "\\f%s", font);
+ n = 0;
+ eatwhite();
+ while((c = Bgetc(&in)) > 0){
+ if(strchr("</>=?!", c)){
+ Bungetc(&in);
+ break;
+ }
+ if(strchr("\n\r\t ", c))
+ break;
+ if(n < sizeof(a->attr)-1)
+ a->attr[n++] = c;
+ }
+ if(n == 0)
+ return 0;
+ a->attr[n] = 0;
+ n = 0;
+ eatwhite();
+ if(Bgetc(&in) == '='){
+ eatwhite();
+ c = Bgetc(&in);
+ if(strchr("'\"", c)){
+ q = c;
+ while((c = Bgetc(&in)) > 0){
+ if(c == q)
+ break;
+ if(n < sizeof(a->val)-1)
+ a->val[n++] = c;
+ }
+ } else {
+ Bungetc(&in);
+ while((c = Bgetc(&in)) > 0){
+ if(strchr("\n\r\t </>?!", c)){
+ Bungetc(&in);
+ break;
+ }
+ if(n < sizeof(a->val)-1)
+ a->val[n++] = c;
+ }
+ }
+ } else
+ Bungetc(&in);
+ a->val[n] = 0;
+ return 1;
}
-void
-g_indent(Goobie *g, char *arg)
+int
+parsetag(Tag *t)
{
- USED(g, arg);
- printsol(".RS\n");
+ int n, c;
+
+ t->nattr = 0;
+ t->opening = 1;
+ t->closing = 0;
+
+ n = 0;
+ eatwhite();
+ while((c = Bgetc(&in)) > 0){
+ if(c == '>')
+ break;
+ if(strchr("\n\r\t ", c)){
+ if(parseattr(t->attr + t->nattr))
+ if(t->nattr < nelem(t->attr)-1)
+ t->nattr++;
+ continue;
+ }
+ if(n == 0 && strchr("?!", c)){
+ parsecomment();
+ return 0;
+ }
+ if(c == '/'){
+ if(n == 0){
+ t->opening = 0;
+ t->closing = 1;
+ } else
+ t->closing = 1;
+ continue;
+ }
+ if(n < sizeof(t->tag)-1)
+ t->tag[n++] = c;
+ }
+ t->tag[n] = 0;
+ return n > 0;
}
-void
-g_exdent(Goobie *g, char *arg)
-{
- USED(g, arg);
- printsol(".RE\n");
+Rune
+parserune(int c)
+{
+ char buf[10];
+ int n;
+ Rune r;
+
+ n = 0;
+ if(c == '&'){
+ while((c = Bgetc(&in)) > 0){
+ if(strchr(";&</>\n\r\t ", c)){
+ if(c != ';')
+ Bungetc(&in);
+ if(n == 0)
+ return '&';
+ break;
+ }
+ if(n == sizeof(buf)-1)
+ break;
+ buf[n++] = c;
+ }
+ buf[n] = 0;
+ if(strcmp(buf, "lt") == 0)
+ return '<';
+ if(strcmp(buf, "gt") == 0)
+ return '>';
+ if(strcmp(buf, "quot") == 0)
+ return '"';
+ if(strcmp(buf, "apos") == 0)
+ return '\'';
+ if(strcmp(buf, "amp") == 0)
+ return '&';
+ /* use tcs -f html to handle the rest. */
+ } else {
+ do {
+ buf[n++] = c;
+ if(fullrune(buf, n)){
+ chartorune(&r, buf);
+ return r;
+ }
+ if(n >= UTFmax)
+ break;
+ } while((c = Bgetc(&in)) > 0);
+ }
+ return 0xFFFD;
+}
+
+Rune
+substrune(Rune r)
+{
+ switch(r){
+ case 0x2019:
+ case 0x2018:
+ return '\'';
+ case 0x201c:
+ case 0x201d:
+ return '"';
+ default:
+ return r;
+ }
}
void
-g_dt(Goobie *g, char *arg)
+debugtag(Tag *tag, char *dbg)
{
- USED(g, arg);
- printsol(".IP \"");
- quoting = 1;
+ if(1){
+ USED(tag);
+ USED(dbg);
+ return;
+ }
+
+ if(tag == nil)
+ return;
+ debugtag(tag->up, nil);
+ fprint(2, "%s %s%s", tag->tag, dbg ? dbg : " > ", dbg ? "\n" : "");
}
-void
-g_hr(Goobie *g, char *arg)
+char*
+getattr(Tag *tag, char *attr)
{
- USED(g, arg);
- printsol(".br\n");
- printsol("\\l'5i'\n");
+ int i;
+
+ for(i=0; i<tag->nattr; i++)
+ if(cistrcmp(tag->attr[i].attr, attr) == 0)
+ return tag->attr[i].val;
+ return nil;
}
+int
+gotattr(Tag *tag, char *attr, char *val)
+{
+ char *v;
-/*
-<table border>
-<caption><font size="+1"><b>Cumulative Class Data</b></font></caption>
-<tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th>
-</tr>
-<tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th>
-</tr>
-<tr align=center>
-<td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-<tr align=center>
-<td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
-</tr>
-</table>
-*/
+ if((v = getattr(tag, attr)) == nil)
+ return 0;
+ return cistrstr(v, val) != 0;
+}
+
+int
+gotstyle(Tag *tag, char *style, char *val)
+{
+ char *v;
+
+ if((v = getattr(tag, "style")) == nil)
+ return 0;
+ if((v = cistrstr(v, style)) == nil)
+ return 0;
+ v += strlen(style);
+ while(*v && *v != ':')
+ v++;
+ if(*v != ':')
+ return 0;
+ v++;
+ while(*v && strchr("\t ", *v))
+ v++;
+ if(cistrncmp(v, val, strlen(val)))
+ return 0;
+ return 1;
+}
void
-g_table(Goobie *g, char *arg)
+reparent(Text *text, Tag *tag, Tag *up)
{
- USED(g, arg);
- printsol(".TS\ncenter ;\n");
+ Tag *old;
+
+ old = tag->up;
+ while(old != up){
+ debugtag(old, "reparent");
+ if(old->close){
+ old->close(text, old);
+ old->close = nil;
+ }
+ old = old->up;
+ }
+ tag->up = up;
}
+
void
-g_tableend(Goobie *g, char *arg)
-{
- USED(g, arg);
- printsol(".TE\n");
+parsetext(Text *text, Tag *tag)
+{
+ int hidden, c;
+ Tag t, *up;
+ Rune r;
+
+ if(tag){
+ up = tag->up;
+ debugtag(tag, "open");
+ for(c = 0; c < nelem(ontag); c++){
+ if(cistrcmp(tag->tag, ontag[c].tag) == 0){
+ ontag[c].open(text, tag);
+ break;
+ }
+ }
+ hidden = getattr(tag, "hidden") || gotstyle(tag, "display", "none");
+ } else {
+ up = nil;
+ hidden = 0;
+ }
+ if(tag == nil || tag->closing == 0){
+ while((c = Bgetc(&in)) > 0){
+ if(c == '<'){
+ memset(&t, 0, sizeof(t));
+ if(parsetag(&t)){
+ if(t.opening){
+ t.up = tag;
+ parsetext(text, &t);
+ if(t.up != tag){
+ debugtag(tag, "skip");
+ up = t.up;
+ break;
+ }
+ debugtag(tag, "back");
+ } else if(t.closing){
+ up = tag;
+ while(up && cistrcmp(up->tag, t.tag))
+ up = up->up;
+ if(up){
+ up = up->up;
+ break;
+ }
+ }
+ }
+ continue;
+ }
+ if(hidden || !text->output)
+ continue;
+ r = substrune(parserune(c));
+ switch(r){
+ case '\n':
+ case '\r':
+ case ' ':
+ case '\t':
+ if(text->pre == 0){
+ text->space = 1;
+ break;
+ }
+ default:
+ if(text->space){
+ if(text->pos >= 70)
+ emitrune(text, '\n');
+ else if(text->pos > 0)
+ emitrune(text, ' ');
+ }
+ if((text->pos == 0 && r == '.') || r == '\\')
+ emit(text, "\\&");
+ if(r == '\\' || r == 0xA0)
+ emitrune(text, '\\');
+ if(r == 0xA0)
+ r = ' ';
+ emitrune(text, r);
+ text->space = 0;
+ }
+ }
+ }
+ if(tag){
+ debugtag(tag, "close");
+ if(tag->close){
+ tag->close(text, tag);
+ tag->close = nil;
+ }
+ if(up)
+ tag->up = up;
+ }
}
void
-g_caption(Goobie *g, char *arg)
+inittext(Text *text)
{
- USED(g, arg);
+ memset(text, 0, sizeof(Text));
+ text->fontstyle = "R";
+ text->fontsize = "NL";
+ text->output = 1;
}
void
-g_captionend(Goobie *g, char *arg)
+main(void)
{
- USED(g, arg);
+ Text text;
+ Binit(&in, 0, OREAD);
+ inittext(&text);
+ parsetext(&text, nil);
+ emit(&text, "\n");
+ write(1, text.bp, text.wp - text.bp);
}