]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/html2ms.c
kbdfs: simplfy
[plan9front.git] / sys / src / cmd / html2ms.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5
6 enum
7 {
8         SSIZE = 10,
9
10         /* list types */
11         Lordered = 0,
12         Lunordered,
13         Lmenu,
14         Ldir,
15
16 };
17
18 Biobuf in, out;
19 int lastc = '\n';
20 int inpre = 0;
21
22 /* stack for fonts */
23 char *fontstack[SSIZE];
24 char *font = "R";
25 int fsp;
26
27 /* stack for lists */
28 struct
29 {
30         int     type;
31         int     ord;
32 } liststack[SSIZE];
33 int lsp;
34
35 int quoting;
36
37 typedef struct Goobie Goobie;
38 struct Goobie
39 {
40         char *name;
41         void (*f)(Goobie*, char*);
42         void (*ef)(Goobie*, char*);
43 };
44
45 void    eatwhite(void);
46 void    escape(void);
47
48 typedef void Action(Goobie*, char*);
49
50 Action  g_ignore;
51 Action  g_unexpected;
52 Action  g_title;
53 Action  g_p;
54 Action  g_h;
55 Action  g_li;
56 Action  g_list, g_listend;
57 Action  g_pre;
58 Action  g_fpush, g_fpop;
59 Action  g_indent, g_exdent;
60 Action  g_dt;
61 Action  g_display;
62 Action  g_displayend;
63 Action  g_table, g_tableend, g_caption, g_captionend;
64 Action  g_br, g_hr;
65
66 Goobie gtab[] =
67 {
68         "!--",          g_ignore,       g_unexpected,
69         "!doctype",     g_ignore,       g_unexpected,
70         "a",            g_ignore,       g_ignore,
71         "address",      g_display,      g_displayend,
72         "b",            g_fpush,        g_fpop,
73         "base",         g_ignore,       g_unexpected,
74         "blink",        g_ignore,       g_ignore,
75         "blockquote",   g_ignore,       g_ignore,
76         "body",         g_ignore,       g_ignore,
77         "br",           g_br,           g_unexpected,
78         "caption",      g_caption,      g_captionend,
79         "center",       g_ignore,       g_ignore,
80         "cite",         g_ignore,       g_ignore,
81         "code",         g_ignore,       g_ignore,
82         "dd",           g_ignore,       g_unexpected,
83         "dfn",          g_ignore,       g_ignore,
84         "dir",          g_list,         g_listend,
85         "dl",           g_indent,       g_exdent,
86         "dt",           g_dt,           g_unexpected,
87         "em",           g_ignore,       g_ignore,
88         "font",         g_ignore,       g_ignore,
89         "form",         g_ignore,       g_ignore,
90         "h1",           g_h,            g_p,
91         "h2",           g_h,            g_p,
92         "h3",           g_h,            g_p,
93         "h4",           g_h,            g_p,
94         "h5",           g_h,            g_p,
95         "h6",           g_h,            g_p,
96         "head",         g_ignore,       g_ignore,
97         "hr",           g_hr,           g_unexpected,
98         "html",         g_ignore,       g_ignore,
99         "i",            g_fpush,        g_fpop,
100         "input",        g_ignore,       g_unexpected,
101         "img",          g_ignore,       g_unexpected,
102         "isindex",      g_ignore,       g_unexpected,
103         "kbd",          g_fpush,        g_fpop,
104         "key",          g_ignore,       g_ignore,
105         "li",           g_li,           g_unexpected,
106         "link",         g_ignore,       g_unexpected,
107         "listing",      g_ignore,       g_ignore,
108         "menu",         g_list,         g_listend,
109         "meta",         g_ignore,       g_unexpected,
110         "nextid",       g_ignore,       g_unexpected,
111         "ol",           g_list,         g_listend,
112         "option",       g_ignore,       g_unexpected,
113         "p",            g_p,            g_ignore,
114         "plaintext",    g_ignore,       g_unexpected,
115         "pre",          g_pre,          g_displayend,
116         "samp",         g_ignore,       g_ignore,
117         "select",       g_ignore,       g_ignore,
118         "strong",       g_ignore,       g_ignore,
119         "table",        g_table,        g_tableend,
120         "textarea",     g_ignore,       g_ignore,
121         "title",        g_title,        g_ignore,
122         "tt",           g_fpush,        g_fpop,
123         "u",            g_ignore,       g_ignore,
124         "ul",           g_list,         g_listend,
125         "var",          g_ignore,       g_ignore,
126         "xmp",          g_ignore,       g_ignore,
127         0,              0,      0,
128 };
129
130 typedef struct Entity Entity;
131 struct Entity
132 {
133         char *name;
134         Rune value;
135 };
136
137 Entity pl_entity[]=
138 {
139 "#SPACE", L' ', "#RS",   L'\n', "#RE",   L'\r', "quot",   L'"',
140 "AElig",  L'Æ', "Aacute", L'Á', "Acirc",  L'Â', "Agrave", L'À', "Aring",  L'Å',
141 "Atilde", L'Ã', "Auml",   L'Ä', "Ccedil", L'Ç', "ETH",    L'Ð', "Eacute", L'É',
142 "Ecirc",  L'Ê', "Egrave", L'È', "Euml",   L'Ë', "Iacute", L'Í', "Icirc",  L'Î',
143 "Igrave", L'Ì', "Iuml",   L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc",  L'Ô',
144 "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml",   L'Ö', "THORN",  L'Þ',
145 "Uacute", L'Ú', "Ucirc",  L'Û', "Ugrave", L'Ù', "Uuml",   L'Ü', "Yacute", L'Ý',
146 "aacute", L'á', "acirc",  L'â', "aelig",  L'æ', "agrave", L'à', "amp",    L'&',
147 "aring",  L'å', "atilde", L'ã', "auml",   L'ä', "ccedil", L'ç', "eacute", L'é',
148 "ecirc",  L'ê', "egrave", L'è', "eth",    L'ð', "euml",   L'ë', "gt",     L'>',
149 "iacute", L'í', "icirc",  L'î', "igrave", L'ì', "iuml",   L'ï', "lt",     L'<',
150 "ntilde", L'ñ', "oacute", L'ó', "ocirc",  L'ô', "ograve", L'ò', "oslash", L'ø',
151 "otilde", L'õ', "ouml",   L'ö', "szlig",  L'ß', "thorn",  L'þ', "uacute", L'ú',
152 "ucirc",  L'û', "ugrave", L'ù', "uuml",   L'ü', "yacute", L'ý', "yuml",   L'ÿ',
153 0
154 };
155
156 int
157 cistrcmp(char *a, char *b)
158 {
159         int c, d;
160
161         for(;; a++, b++){
162                 d = tolower(*a);
163                 c = d - tolower(*b);
164                 if(c)
165                         break;
166                 if(d == 0)
167                         break;
168         }
169         return c;
170 }
171
172 int
173 readupto(char *buf, int n, char d, char notme)
174 {
175         char *p;
176         int c;
177
178         buf[0] = 0;
179         for(p = buf;; p++){
180                 c = Bgetc(&in);
181                 if(c < 0){
182                         *p = 0;
183                         return -1;
184                 }
185                 if(c == notme){
186                         Bungetc(&in);
187                         return -1;
188                 }
189                 if(c == d){
190                         *p = 0;
191                         return 0;
192                 }
193                 *p = c;
194                 if(p == buf + n){
195                         *p = 0;
196                         Bprint(&out, "<%s", buf);
197                         return -1;
198                 }
199         }
200 }
201
202 void
203 dogoobie(void)
204 {
205         char *arg, *type;
206         Goobie *g;
207         char buf[1024];
208         int closing;
209
210         if(readupto(buf, sizeof(buf), '>', '<') < 0){
211                 Bprint(&out, "<%s", buf);
212                 return;
213         }
214         type = buf;
215         if(*type == '/'){
216                 type++;
217                 closing = 1;
218         } else
219                 closing = 0;
220         arg = strchr(type, ' ');
221         if(arg == 0)
222                 arg = strchr(type, '\r');
223         if(arg == 0)
224                 arg = strchr(type, '\n');
225         if(arg)
226                 *arg++ = 0;
227         for(g = gtab; g->name; g++)
228                 if(cistrcmp(type, g->name) == 0){
229                         if(closing){
230                                 if(g->ef){
231                                         (*g->ef)(g, arg);
232                                         return;
233                                 }
234                         } else {
235                                 if(g->f){
236                                         (*g->f)(g, arg);
237                                         return;
238                                 }
239                         }
240                 }
241         if(closing)
242                 type--;
243         if(arg)
244                 Bprint(&out, "<%s %s>\n", type, arg);
245         else
246                 Bprint(&out, "<%s>\n", type);
247 }
248
249 void
250 main(void)
251 {
252         int c, pos;
253
254         Binit(&in, 0, OREAD);
255         Binit(&out, 1, OWRITE);
256
257         pos = 0;
258         for(;;){
259                 c = Bgetc(&in);
260                 if(c < 0)
261                         return;
262                 switch(c){
263                 case '<':
264                         dogoobie();
265                         break;
266                 case '&':
267                         escape();
268                         break;
269                 case '\r':
270                         pos = 0;
271                         break;
272                 case '\n':
273                         if(quoting){
274                                 Bputc(&out, '"');
275                                 quoting = 0;
276                         }
277                         if(lastc != '\n')
278                                 Bputc(&out, '\n');
279                         /* can't emit leading spaces in filled troff docs */
280                         if (!inpre)
281                                 eatwhite();
282                         lastc = c;
283                         break;
284                 default:
285                         ++pos;
286                         if(!inpre && isascii(c) && isspace(c) && pos > 80){
287                                 Bputc(&out, '\n');
288                                 eatwhite();
289                                 pos = 0;
290                         }else
291                                 Bputc(&out, c);
292                         lastc = c;
293                         break;
294                 }
295         }
296 }
297
298 void
299 escape(void)
300 {
301         int c;
302         Entity *e;
303         char buf[8];
304
305         if(readupto(buf, sizeof(buf), ';', '\n') < 0){
306                 Bprint(&out, "&%s", buf);
307                 return;
308         }
309         for(e = pl_entity; e->name; e++)
310                 if(strcmp(buf, e->name) == 0){
311                         Bprint(&out, "%C", e->value);
312                         return;
313                 }
314         if(*buf == '#'){
315                 c = atoi(buf+1);
316                 if(isascii(c) && isprint(c)){
317                         Bputc(&out, c);
318                         return;
319                 }
320         }
321         Bprint(&out, "&%s;", buf);
322 }
323
324 /*
325  * whitespace is not significant to HTML, but newlines
326  * and leading spaces are significant to troff.
327  */
328 void
329 eatwhite(void)
330 {
331         int c;
332
333         for(;;){
334                 c = Bgetc(&in);
335                 if(c < 0)
336                         break;
337                 if(!isspace(c)){
338                         Bungetc(&in);
339                         break;
340                 }
341         }
342 }
343
344 /*
345  *  print at start of line
346  */
347 void
348 printsol(char *fmt, ...)
349 {
350         va_list arg;
351
352         if(quoting){
353                 Bputc(&out, '"');
354                 quoting = 0;
355         }
356         if(lastc != '\n')
357                 Bputc(&out, '\n');
358         va_start(arg, fmt);
359         Bvprint(&out, fmt, arg);
360         va_end(arg);
361         lastc = '\n';
362 }
363
364 void
365 g_ignore(Goobie *g, char *arg)
366 {
367         USED(g, arg);
368 }
369
370 void
371 g_unexpected(Goobie *g, char *arg)
372 {
373         USED(arg);
374         fprint(2, "unexpected %s ending\n", g->name);
375 }
376
377 void
378 g_title(Goobie *g, char *arg)
379 {
380         USED(arg);
381         printsol(".TL\n", g->name);
382 }
383
384 void
385 g_p(Goobie *g, char *arg)
386 {
387         USED(arg);
388         printsol(".LP\n", g->name);
389 }
390
391 void
392 g_h(Goobie *g, char *arg)
393 {
394         USED(arg);
395         printsol(".SH %c\n", g->name[1]);
396 }
397
398 void
399 g_list(Goobie *g, char *arg)
400 {
401         USED(arg);
402
403         if(lsp != SSIZE){
404                 switch(g->name[0]){
405                 case 'o':
406                         liststack[lsp].type  = Lordered;
407                         liststack[lsp].ord = 0;
408                         break;
409                 default:
410                         liststack[lsp].type = Lunordered;
411                         break;
412                 }
413         }
414         lsp++;
415 }
416
417 void
418 g_br(Goobie *g, char *arg)
419 {
420         USED(g, arg);
421         printsol(".br\n");
422 }
423
424 void
425 g_li(Goobie *g, char *arg)
426 {
427         USED(g, arg);
428         if(lsp <= 0 || lsp > SSIZE){
429                 printsol(".IP \\(bu\n");
430                 return;
431         }
432         switch(liststack[lsp-1].type){
433         case Lunordered:
434                 printsol(".IP \\(bu\n");
435                 break;
436         case Lordered:
437                 printsol(".IP %d\n", ++liststack[lsp-1].ord);
438                 break;
439         }
440 }
441
442 void
443 g_listend(Goobie *g, char *arg)
444 {
445         USED(g, arg);
446         if(--lsp < 0)
447                 lsp = 0;
448         printsol(".LP\n");
449 }
450
451 void
452 g_display(Goobie *g, char *arg)
453 {
454         USED(g, arg);
455         printsol(".DS\n");
456 }
457
458 void
459 g_pre(Goobie *g, char *arg)
460 {
461         USED(g, arg);
462         printsol(".DS L\n");
463         inpre = 1;
464 }
465
466 void
467 g_displayend(Goobie *g, char *arg)
468 {
469         USED(g, arg);
470         printsol(".DE\n");
471         inpre = 0;
472 }
473
474 void
475 g_fpush(Goobie *g, char *arg)
476 {
477         USED(arg);
478         if(fsp < SSIZE)
479                 fontstack[fsp] = font;
480         fsp++;
481         switch(g->name[0]){
482         case 'b':
483                 font = "B";
484                 break;
485         case 'i':
486                 font = "I";
487                 break;
488         case 'k':               /* kbd */
489         case 't':               /* tt */
490                 font = "(CW";
491                 break;
492         }
493         Bprint(&out, "\\f%s", font);
494 }
495
496 void
497 g_fpop(Goobie *g, char *arg)
498 {
499         USED(g, arg);
500         fsp--;
501         if(fsp < SSIZE)
502                 font = fontstack[fsp];
503         else
504                 font = "R";
505
506         Bprint(&out, "\\f%s", font);
507 }
508
509 void
510 g_indent(Goobie *g, char *arg)
511 {
512         USED(g, arg);
513         printsol(".RS\n");
514 }
515
516 void
517 g_exdent(Goobie *g, char *arg)
518 {
519         USED(g, arg);
520         printsol(".RE\n");
521 }
522
523 void
524 g_dt(Goobie *g, char *arg)
525 {
526         USED(g, arg);
527         printsol(".IP \"");
528         quoting = 1;
529 }
530
531 void
532 g_hr(Goobie *g, char *arg)
533 {
534         USED(g, arg);
535         printsol(".br\n");
536         printsol("\\l'5i'\n");
537 }
538
539
540 /*
541 <table border>
542 <caption><font size="+1"><b>Cumulative Class Data</b></font></caption>
543 <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th>
544 </tr>
545 <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th>
546 </tr>
547 <tr align=center>
548 <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
549 </tr>
550 <tr align=center>
551 <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
552 </tr>
553 <tr align=center>
554 <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
555 </tr>
556 <tr align=center>
557 <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
558 </tr>
559 <tr align=center>
560 <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
561 </tr>
562 <tr align=center>
563 <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
564 </tr>
565 <tr align=center>
566 <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
567 </tr>
568 <tr align=center>
569 <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
570 </tr>
571 <tr align=center>
572 <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
573 </tr>
574 </table>
575 */
576
577 void
578 g_table(Goobie *g, char *arg)
579 {
580         USED(g, arg);
581         printsol(".TS\ncenter ;\n");
582 }
583
584 void
585 g_tableend(Goobie *g, char *arg)
586 {
587         USED(g, arg);
588         printsol(".TE\n");
589 }
590
591 void
592 g_caption(Goobie *g, char *arg)
593 {
594         USED(g, arg);
595 }
596
597 void
598 g_captionend(Goobie *g, char *arg)
599 {
600         USED(g, arg);
601 }