]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/mothra/rdhtml.c
5420b5c502f53c165ca2ce6ffe7fe32cafdad4db
[plan9front.git] / sys / src / cmd / mothra / rdhtml.c
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <event.h>
5 #include <panel.h>
6 #include "mothra.h"
7 #include "html.h"
8 #include "rtext.h"
9
10 typedef struct Fontdata Fontdata;
11 struct Fontdata{
12         char *name;
13         Font *font;
14         int space;
15 }fontlist[4][4]={
16         "dejavusans/unicode.12", 0, 0,
17         "dejavusans/unicode.12", 0, 0,
18         "dejavusans/unicode.14", 0, 0,
19         "dejavusans/unicode.16", 0, 0,
20
21         "dejavusansit/unicode.12", 0, 0,
22         "dejavusansit/unicode.12", 0, 0,
23         "dejavusansit/unicode.14", 0, 0,
24         "dejavusansit/unicode.16", 0, 0,
25
26         "dejavusansbd/unicode.12", 0, 0,
27         "dejavusansbd/unicode.12", 0, 0,
28         "dejavusansbd/unicode.14", 0, 0,
29         "dejavusansbd/unicode.16", 0, 0,
30
31         "vga/vga", 0, 0,
32         "terminus/unicode.14", 0, 0,
33         "terminus/unicode.16", 0, 0,
34         "terminus/unicode.18", 0, 0,
35 };
36 Fontdata *pl_whichfont(int f, int s){
37         char name[NNAME];
38
39         assert(f >= 0 && f < 4);
40         assert(s >= 0 && s < 4);
41
42         if(fontlist[f][s].font==0){
43                 snprint(name, sizeof(name), "/lib/font/bit/%s.font", fontlist[f][s].name);
44                 fontlist[f][s].font=openfont(display, name);
45                 if(fontlist[f][s].font==0) fontlist[f][s].font=font;
46                 fontlist[f][s].space=stringwidth(fontlist[f][s].font, "0");
47         }
48         return &fontlist[f][s];
49         
50 }
51 void getfonts(void){
52         int f, s;
53         for(f=0;f!=4;f++)
54                 for(s=0;s!=4;s++)
55                         pl_whichfont(f, s);
56 }
57 void pl_pushstate(Hglob *g, int t){
58         ++g->state;
59         if(g->state==&g->stack[NSTACK]){
60                 htmlerror(g->name, g->lineno, "stack overflow at <%s>", tag[t].name);
61                 --g->state;
62         }
63         g->state[0]=g->state[-1];
64         g->state->tag=t;
65 }
66 void pl_linespace(Hglob *g){
67         plrtbitmap(&g->dst->text, 1000000, 0, linespace, 0, 0);
68         g->para=0;
69         g->linebrk=0;
70 }
71 enum{
72         HORIZ,
73         VERT,
74 };
75 int strtolength(Hglob *g, int dir, char *str)
76 {
77         double f;
78
79         f = atof(str);
80         if(cistrstr(str, "px"))
81                 return floor(f);
82         if(cistrstr(str, "%"))
83                 return floor(f*((dir==HORIZ) ? Dx(g->dst->text->r) : Dy(g->dst->text->r))/100);
84         if(cistrstr(str, "em")){
85                 Point z;
86                 z = stringsize(g->dst->text->font, "M");
87                 return floor(f*((dir==HORIZ) ? z.x : z.y));
88         }
89         return floor(f);
90 }
91
92 void pl_htmloutput(Hglob *g, int nsp, char *s, Field *field){
93         Fontdata *f;
94         int space, indent;
95         Action *ap;
96         if(g->state->tag==Tag_title
97 /*      || g->state->tag==Tag_textarea */
98         || g->state->tag==Tag_select){
99                 if(s){
100                         if(g->tp!=g->text && g->tp!=g->etext && g->tp[-1]!=' ')
101                                 *g->tp++=' ';
102                         while(g->tp!=g->etext && *s) *g->tp++=*s++;
103                         if(g->state->tag==Tag_title) update(g->dst);
104                         *g->tp='\0';
105                 }
106                 return;
107         }
108         f=pl_whichfont(g->state->font, g->state->size);
109         space=f->space;
110         indent=g->state->margin;
111         if(g->para){
112                 space=1000000;
113                 indent+=g->state->indent;
114         }
115         else if(g->linebrk)
116                 space=1000000;
117         else if(nsp<=0)
118                 space=0;
119         if(g->state->image[0]==0 && g->state->link[0]==0 && g->state->name[0]==0 && field==0)
120                 ap=0;
121         else{
122                 ap=mallocz(sizeof(Action), 1);
123                 if(ap!=0){
124                         if(g->state->image[0])
125                                 ap->image = strdup(g->state->image);
126                         if(g->state->link[0])
127                                 ap->link = strdup(g->state->link);
128                         if(g->state->name[0])
129                                 ap->name = strdup(g->state->name);
130                         ap->ismap=g->state->ismap;
131                         ap->width=g->state->width;
132                         ap->height=g->state->height;
133                         ap->field=field;
134                 }
135         }
136         if(space<0) space=0;
137         if(indent<0) indent=0;
138         if(g->state->pre && s[0]=='\t'){
139                 space=0;
140                 while(s[0]=='\t'){
141                         space++;
142                         s++;
143                 }
144                 space=PL_TAB|space;
145                 if(g->linebrk){
146                         indent=space;
147                         space=1000000;
148                 }
149         }
150         plrtstr(&g->dst->text, space, indent, f->font, strdup(s),
151                 g->state->link[0] || g->state->image[0], ap);
152         g->para=0;
153         g->linebrk=0;
154         update(g->dst);
155 }
156
157 /*
158  * Buffered read, no translation
159  * Save in cache.
160  */
161 int pl_bread(Hglob *g){
162         int n, c;
163         char err[1024];
164         if(g->hbufp==g->ehbuf){
165                 n=read(g->hfd, g->hbuf, NHBUF);
166                 if(n<=0){
167                         if(n<0){
168                                 snprint(err, sizeof(err), "%r reading %s", g->name);
169                                 pl_htmloutput(g, 1, err, 0);
170                         }
171                         g->heof=1;
172                         return EOF;
173                 }
174                 g->hbufp=g->hbuf;
175                 g->ehbuf=g->hbuf+n;
176         }
177         c=*g->hbufp++&255;
178         if(c=='\n') g->lineno++;
179         return c;
180 }
181 /*
182  * Read a character, translating \r\n, \n\r, \r and \n into \n
183  */
184 int pl_readc(Hglob *g){
185         int c;
186         static int peek=-1;
187         if(peek!=-1){
188                 c=peek;
189                 peek=-1;
190         }
191         else
192                 c=pl_bread(g);
193         if(c=='\r'){
194                 c=pl_bread(g);
195                 if(c!='\n') peek=c;
196                 return '\n';
197         }
198         if(c=='\n'){
199                 c=pl_bread(g);
200                 if(c!='\r') peek=c;
201                 return '\n';
202         }
203         return c;
204 }
205 void pl_putback(Hglob *g, int c){
206         if(g->npeekc==NPEEKC) htmlerror(g->name, g->lineno, "too much putback!");
207         else if(c!=EOF) g->peekc[g->npeekc++]=c;
208 }
209 int pl_nextc(Hglob *g){
210         int c;
211         int n;
212         Rune r;
213         char crune[UTFmax+1];
214         if(g->heof) return EOF;
215         if(g->npeekc!=0) return g->peekc[--g->npeekc];
216         c=pl_readc(g);
217         if(c=='<'){
218                 c=pl_readc(g);
219                 if(c=='/'){
220                         c=pl_readc(g);
221                         pl_putback(g, c);
222                         pl_putback(g, '/');
223                         if('a'<=c && c<='z' || 'A'<=c && c<='Z') return STAG;
224                         return '<';
225                 }
226                 pl_putback(g, c);
227                 if(c=='!' || 'a'<=c && c<='z' || 'A'<=c && c<='Z' || c=='?') return STAG;
228                 return '<';
229         }
230         if(c=='>') return ETAG;
231         if(c==EOF) return c;
232         for (n=1; n<=sizeof(crune); n++){
233                 crune[n-1]=c;
234                 if(fullrune(crune, n)){
235                         chartorune(&r, crune);
236                         return r;
237                 }
238                 c=pl_readc(g);
239                 if(c==EOF)
240                         return EOF;
241         }
242         return c;
243 }
244 char *unquot(char *dst, char *src, int len){
245         char *e;
246
247         e=0;
248         while(strchr("\n\r\t ", *src))
249                 src++;
250         if(*src=='\'' || *src=='"'){
251                 e=strrchr(src+1, *src);
252                 src++;
253         }
254         if(e==0) e=strchr(src, 0);
255         len--;
256         if((e - src) < len)
257                 len=e-src;
258         if(len>0) memmove(dst, src, len);
259         dst[len]=0;
260         return dst;
261 }
262 int alnumchar(int c){
263         return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9';
264 }
265 int entchar(int c){
266         return c=='#' || alnumchar(c);
267 }
268
269 /* return url if text token looks like a hyperlink */
270 char *linkify(char *s){
271         if(!cistrncmp(s, "http://", 7))
272                 return strdup(s);
273         if(!cistrncmp(s, "https://", 8))
274                 return strdup(s);
275         if(!cistrncmp(s, "www.", 4)){
276                 int d, i;
277
278                 d = 1;
279                 for(i=4; s[i]; i++){
280                         if(s[i] == '.'){
281                                 if(s[i-1] == '.')
282                                         return 0;
283                                 d++;
284                         } else if(!alnumchar(s[i]))
285                                 break;
286                 }
287                 if(d >= 2)
288                         return smprint("http://%s", s);
289         }
290         return 0;
291 }
292
293 /*
294  * remove entity references, in place.
295  * Potential bug:
296  *      This doesn't work if removing an entity reference can lengthen the string!
297  *      Fortunately, this doesn't happen.
298  */
299 void pl_rmentities(Hglob *g, char *s){
300         char *t, *u, c, svc;
301         Entity *ep;
302         Rune r;
303         t=s;
304         do{
305                 c=*s++;
306                 if(c=='&'
307                 && ((*s=='#' && strchr("0123456789Xx", s[1]))
308                   || 'a'<=*s && *s<='z'
309                   || 'A'<=*s && *s<='Z')){
310                         u=s;
311                         while(entchar(*s)) s++;
312                         svc=*s;
313                         *s = 0;
314                         if(svc==';') s++;
315                         if(strcmp(u, "lt") == 0)
316                                 *t++='<';
317                         else if(strcmp(u, "gt") == 0)
318                                 *t++='>';
319                         else if(strcmp(u, "quot") == 0)
320                                 *t++='"';
321                         else if(strcmp(u, "apos") == 0)
322                                 *t++='\'';
323                         else if(strcmp(u, "amp") == 0)
324                                 *t++='&';
325                         else {
326                                 if(svc==';') s--;
327                                 *s=svc;
328                                 *t++='&';
329                                 while(u<s)
330                                         *t++=*u++;
331                         }
332                 }       
333                 else *t++=c;
334         }while(c);
335 }
336 /*
337  * Skip over white space
338  */
339 char *pl_white(char *s){
340         while(*s==' ' || *s=='\t' || *s=='\n' || *s=='\r') s++;
341         return s;
342 }
343 /*
344  * Skip over HTML word
345  */
346 char *pl_word(char *s){
347         if ('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z') {
348                 s++;
349                 while('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z' || '0'<=*s && *s<='9' || *s=='-' || *s=='.') s++;
350         }
351         return s;
352 }
353 /*
354  * Skip to matching quote
355  */
356 char *pl_quote(char *s){
357         char q;
358         q=*s++;
359         while(*s!=q && *s!='\0') s++;
360         return s;
361 }
362 void pl_dnl(char *s){
363         char *t;
364         for(t=s;*s;s++) if(*s!='\r' && *s!='\n') *t++=*s;
365         *t='\0';
366 }
367 void pl_tagparse(Hglob *g, char *str){
368         char *s, *t, *name, c;
369         Pair *ap;
370         Tag *tagp;
371         g->tag=Tag_end;
372         ap=g->attr;
373         if(str[0]=='!'){        /* test should be strncmp(str, "!--", 3)==0 */
374                 g->tag=Tag_comment;
375                 ap->name=0;
376                 return;
377         }
378         if(str[0]=='/') str++;
379         name=str;
380         s=pl_word(str);
381         if(*s!=' ' && *s!='\n' && *s!='\t' && *s!='\0'){
382                 htmlerror(g->name, g->lineno, "bad tag name in %s", str);
383                 ap->name=0;
384                 return;
385         }
386         if(*s!='\0') *s++='\0';
387         for(t=name;t!=s;t++) if('A'<=*t && *t<='Z') *t+='a'-'A';
388         /*
389          * Binary search would be faster here
390          */
391         for(tagp=tag;tagp->name;tagp++) if(strcmp(name, tagp->name)==0) break;
392         g->tag=tagp-tag;
393         if(g->tag==Tag_end) htmlerror(g->name, g->lineno, "no tag %s", name);
394         for(;;){
395                 s=pl_white(s);
396                 if(*s=='\0'){
397                         ap->name=0;
398                         return;
399                 }
400                 ap->name=s;
401                 s=pl_word(s);
402                 t=pl_white(s);
403                 c=*t;
404                 *s='\0';
405                 for(s=ap->name;*s;s++) if('A'<=*s && *s<='Z') *s+='a'-'A';
406                 if(c=='='){
407                         s=pl_white(t+1);
408                         if(*s=='\'' || *s=='"'){
409                                 ap->value=s+1;
410                                 s=pl_quote(s);
411                                 if(*s=='\0'){
412                                         htmlerror(g->name, g->lineno,
413                                                 "No terminating quote in rhs of attribute %s",
414                                                 ap->name);
415                                         ap->name=0;
416                                         return;
417                                 }
418                                 *s++='\0';
419                                 pl_dnl(ap->value);
420                         }
421                         else{
422                                 /* read up to white space or > */
423                                 ap->value=s;
424                                 while(*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0') s++;
425                                 if(*s!='\0') *s++='\0';
426                         }
427                         pl_rmentities(g, ap->value);
428                 }
429                 else{
430                         if(c!='\0') s++;
431                         ap->value="";
432                 }
433                 if(ap==&g->attr[NATTR-1])
434                         htmlerror(g->name, g->lineno, "too many attributes!");
435                 else ap++;
436         }
437 }
438 int pl_getcomment(Hglob *g){
439         int c;
440         if((c=pl_nextc(g))=='-' && (c=pl_nextc(g))=='-'){
441                 /* <!-- eats everything until --> or EOF */
442                 for(;;){
443                         while((c=pl_nextc(g))!='-' && c!=EOF)
444                                 ;
445                         if(c==EOF)
446                                 break;
447                         if((c=pl_nextc(g))=='-'){
448                                 while((c=pl_nextc(g))=='-')
449                                         ;
450                                 if(c==ETAG || c==EOF)
451                                         break;
452                         }
453                 }
454         } else {
455                 /* <! eats everything until > or EOF */
456                 while(c!=ETAG && c!=EOF)
457                         c=pl_nextc(g);
458         }
459         if(c==EOF)
460                 htmlerror(g->name, g->lineno, "EOF in comment");
461         g->tag=Tag_comment;
462         g->attr->name=0;
463         g->token[0]='\0';
464         return TAG;
465 }
466 int lrunetochar(char *p, int v)
467 {
468         Rune r;
469
470         r=v;
471         return runetochar(p, &r);
472 }
473
474 /*
475  * Read a start or end tag -- the caller has read the initial <
476  */
477 int pl_gettag(Hglob *g){
478         char *tokp;
479         int c;
480         tokp=g->token;
481         if((c=pl_nextc(g))=='!' || c=='?')
482                 return pl_getcomment(g);
483         pl_putback(g, c);
484         while((c=pl_nextc(g))!=ETAG && c!=EOF)
485                 if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
486         *tokp='\0';
487         if(c==EOF) htmlerror(g->name, g->lineno, "EOF in tag");
488         pl_tagparse(g, g->token);
489         if(g->token[0]!='/') return TAG;
490         if(g->attr[0].name!=0)
491                 htmlerror(g->name, g->lineno, "end tag should not have attributes");
492         return ENDTAG;
493 }
494 /*
495  * The next token is a tag, an end tag or a sequence of
496  * non-white characters.
497  * If inside <pre>, newlines are converted to <br> and spaces are preserved.
498  * Otherwise, spaces and newlines are noted and discarded.
499  */
500 int pl_gettoken(Hglob *g){
501         char *tokp;
502         int c;
503         if(g->state->pre) switch(c=pl_nextc(g)){
504         case STAG: return pl_gettag(g);
505         case EOF: return EOF;
506         case '\n':
507                 pl_tagparse(g, "br");
508                 return TAG;
509         default:
510                 tokp=g->token;
511                 while(c=='\t'){
512                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
513                         c=pl_nextc(g);
514                 }
515                 while(c!='\t' && c!='\n' && c!=STAG && c!=EOF){
516                         if(c==ETAG) c='>';
517                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
518                         c=pl_nextc(g);
519                 }
520                 *tokp='\0';
521                 pl_rmentities(g, g->token);
522                 pl_putback(g, c);
523                 g->nsp=0;
524                 g->spacc=0;
525                 return TEXT;
526         }
527         while((c=pl_nextc(g))==' ' || c=='\t' || c=='\n')
528                 if(g->spacc!=-1)
529                         g->spacc++;
530         switch(c){
531         case STAG: return pl_gettag(g);
532         case EOF: return EOF;
533         default:
534                 tokp=g->token;
535                 do{
536                         if(c==ETAG) c='>';
537                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
538                         c=pl_nextc(g);
539                 }while(c!=' ' && c!='\t' && c!='\n' && c!=STAG && c!=EOF);
540                 *tokp='\0';
541                 pl_rmentities(g, g->token);
542                 pl_putback(g, c);
543                 g->nsp=g->spacc;
544                 g->spacc=0;
545                 return TEXT;
546         }
547 }
548 char *pl_getattr(Pair *attr, char *name){
549         for(;attr->name;attr++)
550                 if(strcmp(attr->name, name)==0)
551                         return attr->value;
552         return 0;
553 }
554 int pl_hasattr(Pair *attr, char *name){
555         for(;attr->name;attr++)
556                 if(strcmp(attr->name, name)==0)
557                         return 1;
558         return 0;
559 }
560 void plaintext(Hglob *g){
561         char line[NLINE];
562         char *lp, *elp;
563         int c;
564         g->state->font=CWIDTH;
565         g->state->size=NORMAL;
566         elp=&line[NLINE-UTFmax-1];
567         lp=line;
568         for(;;){
569                 c=pl_readc(g);
570                 if(c==EOF) break;
571                 if(c=='\n' || lp>=elp){
572                         *lp='\0';
573                         g->linebrk=1;
574                         pl_htmloutput(g, 0, line, 0);
575                         lp=line;
576                 }
577                 if(c=='\t'){
578                         do *lp++=' '; while(lp<elp && utfnlen(line, lp-line)%8!=0);
579                 }
580                 else if(c!='\n')
581                         lp += lrunetochar(lp, c);
582         }
583         if(lp!=line){
584                 *lp='\0';
585                 g->linebrk=1;
586                 pl_htmloutput(g, 0, line, 0);
587         }
588 }
589 void plrdplain(char *name, int fd, Www *dst){
590         Hglob g;
591         g.state=g.stack;
592         g.state->tag=Tag_html;
593         g.state->font=CWIDTH;
594         g.state->size=NORMAL;
595         g.state->pre=0;
596         g.state->image[0]=0;
597         g.state->link[0]=0;
598         g.state->name[0]=0;
599         g.state->margin=0;
600         g.state->indent=20;
601         g.state->ismap=0;
602         g.dst=dst;
603         g.hfd=fd;
604         g.name=name;
605         g.ehbuf=g.hbufp=g.hbuf;
606         g.npeekc=0;
607         g.heof=0;
608         g.lineno=1;
609         g.linebrk=1;
610         g.para=0;
611         g.text=dst->title;
612         g.tp=g.text;
613         g.etext=g.text+NTITLE-1;
614         g.spacc=0;
615         g.form=0;
616         strncpy(g.text, name, NTITLE);
617         plaintext(&g);
618         finish(dst);
619 }
620 void plrdhtml(char *name, int fd, Www *dst){
621         Stack *sp;
622         char buf[20];
623         char *str;
624         Hglob g;
625         int t;
626         int tagerr;
627
628         g.state=g.stack;
629         g.state->tag=Tag_html;
630         g.state->font=ROMAN;
631         g.state->size=NORMAL;
632         g.state->pre=0;
633         g.state->image[0]=0;
634         g.state->link[0]=0;
635         g.state->name[0]=0;
636         g.state->margin=0;
637         g.state->indent=25;
638         g.state->ismap=0;
639         g.state->width=0;
640         g.state->height=0;
641         g.dst=dst;
642         g.hfd=fd;
643         g.name=name;
644         g.ehbuf=g.hbufp=g.hbuf;
645         g.npeekc=0;
646         g.heof=0;
647         g.lineno=1;
648         g.linebrk=1;
649         g.para=0;
650         g.text=dst->title;
651         g.tp=g.text;
652         g.etext=g.text+NTITLE-1;
653         dst->title[0]='\0';
654         g.spacc=0;
655         g.form=0;
656
657         for(;;) switch(pl_gettoken(&g)){
658         case TAG:
659                 switch(tag[g.tag].action){
660                 case OPTEND:
661                         for(sp=g.state;sp!=g.stack && sp->tag!=g.tag;--sp);
662                         if(sp->tag!=g.tag)
663                                 pl_pushstate(&g, g.tag);
664                         else
665                                 for(;g.state!=sp;--g.state)
666                                         if(tag[g.state->tag].action!=OPTEND)
667                                                 htmlerror(g.name, g.lineno,
668                                                         "end tag </%s> missing",
669                                                         tag[g.state->tag].name);
670                         break;
671                 case END:
672                         pl_pushstate(&g, g.tag);
673                         break;
674                 }
675                 if(str=pl_getattr(g.attr, "id")){
676                         char swap[NNAME];
677
678                         strncpy(swap, g.state->name, sizeof(swap));
679                         strncpy(g.state->name, str, sizeof(g.state->name));
680                         pl_htmloutput(&g, 0, "", 0);
681                         strncpy(g.state->name, swap, sizeof(g.state->name));
682                 }
683                 switch(g.tag){
684                 default:
685                         htmlerror(g.name, g.lineno,
686                                 "unimplemented tag <%s>", tag[g.tag].name);
687                         break;
688                 case Tag_end:   /* unrecognized start tag */
689                         break;
690                 case Tag_img:
691                         if(str=pl_getattr(g.attr, "src"))
692                                 strncpy(g.state->image, str, sizeof(g.state->image));
693                         g.state->ismap=pl_hasattr(g.attr, "ismap");
694                         if(str=pl_getattr(g.attr, "width"))
695                                 g.state->width = strtolength(&g, HORIZ, str);
696                         if(str=pl_getattr(g.attr, "height"))
697                                 g.state->height = strtolength(&g, VERT, str);
698                         str=pl_getattr(g.attr, "alt");
699                         if(str==0){
700                                 if(g.state->image[0])
701                                         str=g.state->image;
702                                 else
703                                         str="[[image]]";
704                         }
705                         pl_htmloutput(&g, 0, str, 0);
706                         g.state->image[0]=0;
707                         g.state->ismap=0;
708                         g.state->width=0;
709                         g.state->height=0;
710                         break;
711                 case Tag_plaintext:
712                         g.spacc=0;
713                         plaintext(&g);
714                         break;
715                 case Tag_comment:
716                 case Tag_html:
717                 case Tag_link:
718                 case Tag_nextid:
719                 case Tag_table:
720                         break;
721                 case Tag_tr:
722                         g.spacc=0;
723                         g.linebrk=1;
724                         break;
725                 case Tag_td:
726                         g.spacc++;
727                         break;
728                 case Tag_a:
729                         if(str=pl_getattr(g.attr, "href"))
730                                 strncpy(g.state->link, str, sizeof(g.state->link));
731                         if(str=pl_getattr(g.attr, "name")){
732                                 strncpy(g.state->name, str, sizeof(g.state->name));
733                                 pl_htmloutput(&g, 0, "", 0);
734                         }
735                         break;
736                 case Tag_meta:
737                         if((str=pl_getattr(g.attr, "http-equiv"))==0)
738                                 break;
739                         if(cistrcmp(str, "refresh"))
740                                 break;
741                         if((str=pl_getattr(g.attr, "content"))==0)
742                                 break;
743                         if((str=strchr(str, '='))==0)
744                                 break;
745                         str++;
746                         str=unquot(g.state->link, str, sizeof(g.state->link));
747                         pl_htmloutput(&g, 0, "refresh: ", 0);
748                         pl_htmloutput(&g, 0, str, 0);
749                         g.state->link[0]=0;
750                         g.linebrk=1;
751                         g.spacc=0;
752                         break;
753                 case Tag_source:
754                 case Tag_video:
755                 case Tag_audio:
756                 case Tag_embed:
757                 case Tag_frame:
758                 case Tag_iframe:
759                         if(str=pl_getattr(g.attr, "src"))
760                                 strncpy(g.state->link, str, sizeof(g.state->link));
761                         if(str=pl_getattr(g.attr, "name"))
762                                 strncpy(g.state->name, str, sizeof(g.state->name));
763                         else
764                                 str = g.state->link;
765                         pl_htmloutput(&g, 0, tag[g.tag].name, 0);
766                         pl_htmloutput(&g, 0, ": ", 0);
767                         pl_htmloutput(&g, 0, str, 0);
768                         g.state->link[0]=0;
769                         g.state->name[0]=0;
770                         g.linebrk=1;
771                         g.spacc=0;
772                         break;
773                 case Tag_address:
774                         g.spacc=0;
775                         g.linebrk=1;
776                         g.state->font=ROMAN;
777                         g.state->size=NORMAL;
778                         g.state->margin=300;
779                         g.state->indent=50;
780                         break;
781                 case Tag_b:
782                 case Tag_strong:
783                         g.state->font=BOLD;
784                         break;
785                 case Tag_blockquot:
786                         g.spacc=0;
787                         g.linebrk=1;
788                         g.state->margin+=50;
789                         g.state->indent=20;
790                         break;
791                 case Tag_body:
792                         break;
793                 case Tag_head:
794                         g.state->font=ROMAN;
795                         g.state->size=NORMAL;
796                         g.state->margin=0;
797                         g.state->indent=20;
798                         g.spacc=0;
799                         break;
800                 case Tag_div:
801                 case Tag_br:
802                         g.spacc=0;
803                         g.linebrk=1;
804                         break;
805                 case Tag_center:
806                         /* more to come */
807                         break;
808                 case Tag_cite:
809                 case Tag_acronym:
810                         g.state->font=ITALIC;
811                         g.state->size=NORMAL;
812                         break;
813                 case Tag_code:
814                         g.state->font=CWIDTH;
815                         g.state->size=NORMAL;
816                         break;
817                 case Tag_dd:
818                         g.linebrk=1;
819                         g.state->indent=0;
820                         g.state->font=ROMAN;
821                         g.spacc=0;
822                         break;
823                 case Tag_dfn:
824                         htmlerror(g.name, g.lineno, "<dfn> deprecated");
825                 case Tag_abbr:
826                         g.state->font=BOLD;
827                         g.state->size=NORMAL;
828                         break;
829                 case Tag_dl:
830                         g.state->font=BOLD;
831                         g.state->size=NORMAL;
832                         g.state->margin+=40;
833                         g.spacc=0;
834                         break;
835                 case Tag_dt:
836                         g.para=1;
837                         g.state->indent=-40;
838                         g.state->font=BOLD;
839                         g.spacc=0;
840                         break;
841                 case Tag_font:
842                         /* more to come */
843                         break;
844                 case Tag_u:
845                         htmlerror(g.name, g.lineno, "<u> deprecated");
846                 case Tag_em:
847                 case Tag_i:
848                 case Tag_var:
849                         g.state->font=ITALIC;
850                         break;
851                 case Tag_h1:
852                         g.linebrk=1;
853                         g.state->font=BOLD;
854                         g.state->size=ENORMOUS;
855                         g.state->margin+=100;
856                         g.spacc=0;
857                         break;
858                 case Tag_h2:
859                         pl_linespace(&g);
860                         g.state->font=BOLD;
861                         g.state->size=ENORMOUS;
862                         g.spacc=0;
863                         break;
864                 case Tag_h3:
865                         g.linebrk=1;
866                         pl_linespace(&g);
867                         g.state->font=ITALIC;
868                         g.state->size=ENORMOUS;
869                         g.state->margin+=20;
870                         g.spacc=0;
871                         break;
872                 case Tag_h4:
873                         pl_linespace(&g);
874                         g.state->font=BOLD;
875                         g.state->size=LARGE;
876                         g.state->margin+=10;
877                         g.spacc=0;
878                         break;
879                 case Tag_h5:
880                         pl_linespace(&g);
881                         g.state->font=ITALIC;
882                         g.state->size=LARGE;
883                         g.state->margin+=10;
884                         g.spacc=0;
885                         break;
886                 case Tag_h6:
887                         pl_linespace(&g);
888                         g.state->font=BOLD;
889                         g.state->size=LARGE;
890                         g.spacc=0;
891                         break;
892                 case Tag_hr:
893                         g.spacc=0;
894                         plrtbitmap(&g.dst->text, 1000000, g.state->margin, hrule, 0, 0);
895                         break;
896                 case Tag_key:
897                         htmlerror(g.name, g.lineno, "<key> deprecated");
898                 case Tag_kbd:
899                         g.state->font=CWIDTH;
900                         break;
901                 case Tag_dir:
902                 case Tag_menu:
903                 case Tag_ol:
904                 case Tag_ul:
905                         g.state->number=0;
906                         g.linebrk=1;
907                         g.state->margin+=25;
908                         g.state->indent=-25;
909                         g.spacc=0;
910                         break;
911                 case Tag_li:
912                         g.spacc=0;
913                         switch(g.state->tag){
914                         default:
915                                 htmlerror(g.name, g.lineno, "can't have <li> in <%s>",
916                                         tag[g.state->tag].name);
917                         case Tag_dir:   /* supposed to be multi-columns, can't do! */
918                         case Tag_menu:
919                                 g.linebrk=1;
920                                 break;
921                         case Tag_ol:
922                                 g.para=1;
923                                 snprint(buf, sizeof(buf), "%2d  ", ++g.state->number);
924                                 pl_htmloutput(&g, 0, buf, 0);
925                                 break;
926                         case Tag_ul:
927                                 g.para=0;
928                                 g.linebrk=0;
929                                 g.spacc=-1;
930                                 plrtbitmap(&g.dst->text, 100000,
931                                         g.state->margin+g.state->indent, bullet, 0, 0);
932                                 break;
933                         }
934                         break;
935                 case Tag_p:
936                         pl_linespace(&g);
937                         g.linebrk=1;
938                         g.spacc=0;
939                         break;
940                 case Tag_listing:
941                 case Tag_xmp:
942                         htmlerror(g.name, g.lineno, "<%s> deprecated", tag[g.tag].name);
943                 case Tag_pre:
944                 case Tag_samp:
945                         g.state->indent=0;
946                         g.state->pre=1;
947                         g.state->font=CWIDTH;
948                         g.state->size=NORMAL;
949                         pl_linespace(&g);
950                         break;
951                 case Tag_tt:
952                         g.state->font=CWIDTH;
953                         g.state->size=NORMAL;
954                         break;
955                 case Tag_title:
956                         g.text=dst->title+strlen(dst->title);
957                         g.tp=g.text;
958                         g.etext=dst->title+NTITLE-1;
959                         break;
960                 case Tag_form:
961                 case Tag_input:
962                 case Tag_button:
963                 case Tag_select:
964                 case Tag_option:
965                 case Tag_textarea:
966                 case Tag_isindex:
967                         rdform(&g);
968                         break;
969                 case Tag_script:
970                 case Tag_object:
971                 case Tag_applet:
972                 case Tag_style:
973                         /*
974                          * ignore the content of these tags, eat tokens until we
975                          * reach a matching endtag.
976                          */
977                         t = g.tag;
978                         for(;;){
979                                 switch(pl_gettoken(&g)){
980                                 default:
981                                         continue;
982                                 case ENDTAG:
983                                         if(g.tag != t)
984                                                 continue;
985                                 case EOF:
986                                         break;
987                                 }
988                                 break;
989                         }
990                         break;
991                 }
992                 break;
993
994         case ENDTAG:
995                 /*
996                  * If the end tag doesn't match the top, we try to uncover a match
997                  * on the stack.
998                  */
999                 if(g.state->tag!=g.tag){
1000                         tagerr=0;
1001                         for(sp=g.state;sp!=g.stack;--sp){
1002                                 if(sp->tag==g.tag)
1003                                         break;
1004                                 if(tag[g.state->tag].action!=OPTEND) tagerr++;
1005                         }
1006                         if(sp==g.stack){
1007                                 if(tagerr)
1008                                         htmlerror(g.name, g.lineno,
1009                                                 "end tag mismatch <%s>...</%s>, ignored",
1010                                                 tag[g.state->tag].name, tag[g.tag].name);
1011                         }
1012                         else{
1013                                 if(tagerr)
1014                                         htmlerror(g.name, g.lineno,
1015                                                 "end tag mismatch <%s>...</%s>, "
1016                                                 "intervening tags popped",
1017                                                 tag[g.state->tag].name, tag[g.tag].name);
1018                                 g.state=sp-1;
1019                         }
1020                 }
1021                 else if(g.state==g.stack)
1022                         htmlerror(g.name, g.lineno, "end tag </%s> at stack bottom",
1023                                 tag[g.tag].name);
1024                 else
1025                         --g.state;
1026                 switch(g.tag){
1027                 case Tag_select:
1028                 case Tag_form:
1029                 case Tag_textarea:
1030                         endform(&g);
1031                         break;
1032                 case Tag_h1:
1033                 case Tag_h2:
1034                 case Tag_h3:
1035                 case Tag_h4:
1036                         pl_linespace(&g);
1037                         break;
1038                 case Tag_address:
1039                 case Tag_blockquot:
1040                 case Tag_body:
1041                 case Tag_dir:
1042                 case Tag_dl:
1043                 case Tag_dt:
1044                 case Tag_h5:
1045                 case Tag_h6:
1046                 case Tag_listing:
1047                 case Tag_menu:
1048                 case Tag_ol:
1049                 case Tag_samp:
1050                 case Tag_title:
1051                 case Tag_ul:
1052                 case Tag_xmp:
1053                 case Tag_table:
1054                         g.linebrk=1;
1055                         break;
1056                 case Tag_pre:
1057                         pl_linespace(&g);
1058                         break;
1059                 }
1060                 break;
1061         case TEXT:
1062                 if(g.state->link[0]==0 && (str = linkify(g.token))){
1063                         strncpy(g.state->link, str, sizeof(g.state->link));
1064                         pl_htmloutput(&g, g.nsp, g.token, 0);
1065                         g.state->link[0] = 0;
1066                         free(str);
1067                 } else
1068                         pl_htmloutput(&g, g.nsp, g.token, 0);
1069                 break;
1070         case EOF:
1071                 for(;g.state!=g.stack;--g.state)
1072                         if(tag[g.state->tag].action!=OPTEND)
1073                                 htmlerror(g.name, g.lineno,
1074                                         "missing </%s> at EOF", tag[g.state->tag].name);
1075                 *g.tp='\0';
1076                 update(dst);
1077                 getpix(dst->text, dst);
1078                 finish(dst);
1079                 return;
1080         }
1081 }