]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/mothra/rdhtml.c
mothra: add <strike> support
[plan9front.git] / sys / src / cmd / mothra / rdhtml.c
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <event.h>
5 #include <panel.h>
6 #include "mothra.h"
7 #include "html.h"
8 #include "rtext.h"
9
10 typedef struct Fontdata Fontdata;
11 struct Fontdata{
12         char *name;
13         Font *font;
14         int space;
15 }fontlist[4][4]={
16         "dejavusans/unicode.12", 0, 0,
17         "dejavusans/unicode.12", 0, 0,
18         "dejavusans/unicode.14", 0, 0,
19         "dejavusans/unicode.16", 0, 0,
20
21         "dejavusansit/unicode.12", 0, 0,
22         "dejavusansit/unicode.12", 0, 0,
23         "dejavusansit/unicode.14", 0, 0,
24         "dejavusansit/unicode.16", 0, 0,
25
26         "dejavusansbd/unicode.12", 0, 0,
27         "dejavusansbd/unicode.12", 0, 0,
28         "dejavusansbd/unicode.14", 0, 0,
29         "dejavusansbd/unicode.16", 0, 0,
30
31         "terminus/unicode.12", 0, 0,
32         "terminus/unicode.14", 0, 0,
33         "terminus/unicode.16", 0, 0,
34         "terminus/unicode.18", 0, 0,
35 };
36 Fontdata *pl_whichfont(int f, int s){
37         char name[NNAME];
38
39         assert(f >= 0 && f < 4);
40         assert(s >= 0 && s < 4);
41
42         if(fontlist[f][s].font==0){
43                 snprint(name, sizeof(name), "/lib/font/bit/%s.font", fontlist[f][s].name);
44                 fontlist[f][s].font=openfont(display, name);
45                 if(fontlist[f][s].font==0) fontlist[f][s].font=font;
46                 fontlist[f][s].space=stringwidth(fontlist[f][s].font, "0");
47         }
48         return &fontlist[f][s];
49         
50 }
51 void getfonts(void){
52         int f, s;
53         for(f=0;f!=4;f++)
54                 for(s=0;s!=4;s++)
55                         pl_whichfont(f, s);
56 }
57 void pl_pushstate(Hglob *g, int t){
58         ++g->state;
59         if(g->state==&g->stack[NSTACK]){
60                 htmlerror(g->name, g->lineno, "stack overflow at <%s>", tag[t].name);
61                 --g->state;
62         }
63         g->state[0]=g->state[-1];
64         g->state->tag=t;
65 }
66 void pl_linespace(Hglob *g){
67         plrtbitmap(&g->dst->text, 1000000, 0, linespace, 0, 0);
68         g->para=0;
69         g->linebrk=0;
70 }
71
72 int strtolength(Hglob *g, int dir, char *str){
73         double f;
74         Point p;
75
76         f = atof(str);
77         if(cistrstr(str, "%"))
78                 return 0;
79         if(cistrstr(str, "em")){
80                 p=stringsize(pl_whichfont(g->state->font, g->state->size)->font, "M");
81                 return floor(f*((dir==HORIZ) ? p.x : p.y));
82         }
83         return floor(f);
84 }
85
86 void pl_htmloutput(Hglob *g, int nsp, char *s, Field *field){
87         Fontdata *f;
88         int space, indent, flags;
89         Action *ap;
90         if(g->state->tag==Tag_title
91 /*      || g->state->tag==Tag_textarea */
92         || g->state->tag==Tag_select){
93                 if(s){
94                         if(g->tp!=g->text && g->tp!=g->etext && g->tp[-1]!=' ')
95                                 *g->tp++=' ';
96                         while(g->tp!=g->etext && *s) *g->tp++=*s++;
97                         if(g->state->tag==Tag_title) g->dst->changed=1;
98                         *g->tp='\0';
99                 }
100                 return;
101         }
102         f=pl_whichfont(g->state->font, g->state->size);
103         space=f->space;
104         indent=g->state->margin;
105         if(g->para){
106                 space=1000000;
107                 indent+=g->state->indent;
108         }
109         else if(g->linebrk)
110                 space=1000000;
111         else if(nsp<=0)
112                 space=0;
113         if(g->state->image[0]==0 && g->state->link[0]==0 && g->state->name[0]==0 && field==0)
114                 ap=0;
115         else{
116                 ap=emalloc(sizeof(Action));
117                 if(g->state->image[0])
118                         ap->image = strdup(g->state->image);
119                 if(g->state->link[0])
120                         ap->link = strdup(g->state->link);
121                 if(g->state->name[0])
122                         ap->name = strdup(g->state->name);
123                 ap->ismap=g->state->ismap;
124                 ap->width=g->state->width;
125                 ap->height=g->state->height;
126                 ap->field=field;
127         }
128         if(space<0) space=0;
129         if(indent<0) indent=0;
130         if(g->state->pre && s[0]=='\t'){
131                 space=0;
132                 while(s[0]=='\t'){
133                         space++;
134                         s++;
135                 }
136                 space=PL_TAB|space;
137                 if(g->linebrk){
138                         indent=space;
139                         space=1000000;
140                 }
141         }
142         flags = 0;
143         if(g->state->link[0])
144                 flags |= PL_HOT;
145         if(g->state->strike)
146                 flags |= PL_STR;
147         plrtstr(&g->dst->text, space, indent, f->font, strdup(s), flags, ap);
148         g->para=0;
149         g->linebrk=0;
150         g->dst->changed=1;
151 }
152
153 /*
154  * Buffered read, no translation
155  * Save in cache.
156  */
157 int pl_bread(Hglob *g){
158         int n, c;
159         char err[1024];
160         if(g->hbufp==g->ehbuf){
161                 n=read(g->hfd, g->hbuf, NHBUF);
162                 if(n<=0){
163                         if(n<0){
164                                 snprint(err, sizeof(err), "%r reading %s", g->name);
165                                 pl_htmloutput(g, 1, err, 0);
166                         }
167                         g->heof=1;
168                         return EOF;
169                 }
170                 g->hbufp=g->hbuf;
171                 g->ehbuf=g->hbuf+n;
172         }
173         c=*g->hbufp++&255;
174         if(c=='\n') g->lineno++;
175         return c;
176 }
177 /*
178  * Read a character, translating \r\n, \n\r, \r and \n into \n
179  * convert to runes.
180  */
181 int pl_readc(Hglob *g){
182         static int peek=-1;
183         char crune[UTFmax+1];
184         int c, n;
185         Rune r;
186
187         if(peek!=-1){
188                 c=peek;
189                 peek=-1;
190         }
191         else
192                 c=pl_bread(g);
193         if(c=='\r'){
194                 c=pl_bread(g);
195                 if(c!='\n') peek=c;
196                 return '\n';
197         }
198         if(c=='\n'){
199                 c=pl_bread(g);
200                 if(c!='\r') peek=c;
201                 return '\n';
202         }
203
204         if(c < Runeself)
205                 return c;
206
207         crune[0]=c;
208         for (n=1; n<=sizeof(crune); n++){
209                 if(fullrune(crune, n)){
210                         chartorune(&r, crune);
211                         return r;
212                 }
213                 c=pl_bread(g);
214                 if(c==EOF)
215                         return EOF;
216                 crune[n]=c;
217         }
218         return c;
219 }
220 void pl_putback(Hglob *g, int c){
221         if(g->npeekc==NPEEKC) htmlerror(g->name, g->lineno, "too much putback!");
222         else if(c!=EOF) g->peekc[g->npeekc++]=c;
223 }
224 int pl_nextc(Hglob *g){
225         int c;
226
227         if(g->heof) return EOF;
228         if(g->npeekc!=0) return g->peekc[--g->npeekc];
229         c=pl_readc(g);
230         if(c=='<'){
231                 c=pl_readc(g);
232                 if(c=='/'){
233                         c=pl_readc(g);
234                         pl_putback(g, c);
235                         pl_putback(g, '/');
236                         if('a'<=c && c<='z' || 'A'<=c && c<='Z') return STAG;
237                         return '<';
238                 }
239                 pl_putback(g, c);
240                 if(c=='!' || 'a'<=c && c<='z' || 'A'<=c && c<='Z' || c=='?') return STAG;
241                 return '<';
242         }
243         if(c=='>') return ETAG;
244         return c;
245 }
246
247 char *unquot(char *dst, char *src, int len){
248         char *e;
249
250         e=0;
251         while(*src && strchr(" \t\r\n", *src))
252                 src++;
253         if(*src=='\'' || *src=='"'){
254                 e=strrchr(src+1, *src);
255                 src++;
256         }
257         if(e==0) e=strchr(src, 0);
258         len--;
259         if((e - src) < len)
260                 len=e-src;
261         if(len>0) memmove(dst, src, len);
262         dst[len]=0;
263         return dst;
264 }
265 int alnumchar(int c){
266         return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9';
267 }
268 int entchar(int c){
269         return c=='#' || alnumchar(c);
270 }
271
272 /* return url if text token looks like a hyperlink */
273 char *linkify(char *s){
274         if(s == 0 && s[0] == 0)
275                 return 0;
276         if(!cistrncmp(s, "http://", 7))
277                 return strdup(s);
278         if(!cistrncmp(s, "https://", 8))
279                 return strdup(s);
280         if(!cistrncmp(s, "www.", 4)){
281                 int d, i;
282
283                 d = 1;
284                 for(i=4; s[i]; i++){
285                         if(s[i] == '.'){
286                                 if(s[i-1] == '.')
287                                         return 0;
288                                 d++;
289                         } else if(!alnumchar(s[i]))
290                                 break;
291                 }
292                 if(d >= 2)
293                         return smprint("http://%s", s);
294         }
295         return 0;
296 }
297
298 /*
299  * remove entity references, in place.
300  * Potential bug:
301  *      This doesn't work if removing an entity reference can lengthen the string!
302  *      Fortunately, this doesn't happen.
303  */
304 void pl_rmentities(Hglob *g, char *s){
305         char *t, *u, c, svc;
306         Entity *ep;
307         Rune r;
308         t=s;
309         do{
310                 c=*s++;
311                 if(c=='&'
312                 && ((*s=='#' && strchr("0123456789Xx", s[1]))
313                   || 'a'<=*s && *s<='z'
314                   || 'A'<=*s && *s<='Z')){
315                         u=s;
316                         while(entchar(*s)) s++;
317                         svc=*s;
318                         *s = 0;
319                         if(svc==';') s++;
320                         if(strcmp(u, "lt") == 0)
321                                 *t++='<';
322                         else if(strcmp(u, "gt") == 0)
323                                 *t++='>';
324                         else if(strcmp(u, "quot") == 0)
325                                 *t++='"';
326                         else if(strcmp(u, "apos") == 0)
327                                 *t++='\'';
328                         else if(strcmp(u, "amp") == 0)
329                                 *t++='&';
330                         else {
331                                 if(svc==';') s--;
332                                 *s=svc;
333                                 *t++='&';
334                                 while(u<s)
335                                         *t++=*u++;
336                         }
337                 }       
338                 else *t++=c;
339         }while(c);
340 }
341 /*
342  * Skip over white space
343  */
344 char *pl_white(char *s){
345         while(*s==' ' || *s=='\t' || *s=='\n' || *s=='\r') s++;
346         return s;
347 }
348 /*
349  * Skip over HTML word
350  */
351 char *pl_word(char *s){
352         if ('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z') {
353                 s++;
354                 while('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z' || '0'<=*s && *s<='9' || 
355                         *s=='-' || *s=='.' || *s==':') s++;
356         }
357         return s;
358 }
359 /*
360  * Skip to matching quote
361  */
362 char *pl_quote(char *s){
363         char q;
364         q=*s++;
365         while(*s!=q && *s!='\0') s++;
366         return s;
367 }
368 void pl_dnl(char *s){
369         char *t;
370         for(t=s;*s;s++) if(*s!='\r' && *s!='\n') *t++=*s;
371         *t='\0';
372 }
373 void pl_tagparse(Hglob *g, char *str){
374         char *s, *t, *name, c;
375         Pair *ap;
376         Tag *tagp;
377         g->tag=Tag_end;
378         ap=g->attr;
379         if(str[0]=='!'){        /* test should be strncmp(str, "!--", 3)==0 */
380                 g->tag=Tag_comment;
381                 ap->name=0;
382                 return;
383         }
384         if(str[0]=='/') str++;
385         name=str;
386         s=pl_word(str);
387         if(*s!='/' && *s!=' ' && *s!='\n' && *s!='\t' && *s!='\0'){
388                 htmlerror(g->name, g->lineno, "bad tag name in %s", str);
389                 ap->name=0;
390                 return;
391         }
392         if(*s!='\0') *s++='\0';
393         for(t=name;t!=s;t++) if('A'<=*t && *t<='Z') *t+='a'-'A';
394         /*
395          * Binary search would be faster here
396          */
397         for(tagp=tag;tagp->name;tagp++) if(strcmp(name, tagp->name)==0) break;
398         g->tag=tagp-tag;
399         if(g->tag==Tag_end) htmlerror(g->name, g->lineno, "no tag %s", name);
400         for(;;){
401                 s=pl_white(s);
402                 if(*s=='\0'){
403                         ap->name=0;
404                         return;
405                 }
406                 ap->name=s;
407                 s=pl_word(s);
408                 t=pl_white(s);
409                 c=*t;
410                 *s='\0';
411                 for(s=ap->name;*s;s++) if('A'<=*s && *s<='Z') *s+='a'-'A';
412                 if(c=='='){
413                         s=pl_white(t+1);
414                         if(*s=='\'' || *s=='"'){
415                                 ap->value=s+1;
416                                 s=pl_quote(s);
417                                 if(*s=='\0'){
418                                         htmlerror(g->name, g->lineno,
419                                                 "No terminating quote in rhs of attribute %s",
420                                                 ap->name);
421                                         ap->name=0;
422                                         return;
423                                 }
424                                 *s++='\0';
425                                 pl_dnl(ap->value);
426                         }
427                         else{
428                                 /* read up to white space or > */
429                                 ap->value=s;
430                                 while(*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0') s++;
431                                 if(*s!='\0') *s++='\0';
432                         }
433                         pl_rmentities(g, ap->value);
434                 }
435                 else{
436                         if(c!='\0') s++;
437                         ap->value="";
438                 }
439                 if(ap==&g->attr[NATTR-1])
440                         htmlerror(g->name, g->lineno, "too many attributes!");
441                 else ap++;
442         }
443 }
444 int pl_getcomment(Hglob *g){
445         int c;
446         if((c=pl_nextc(g))=='-' && (c=pl_nextc(g))=='-'){
447                 /* <!-- eats everything until --> or EOF */
448                 for(;;){
449                         while((c=pl_nextc(g))!='-' && c!=EOF)
450                                 ;
451                         if(c==EOF)
452                                 break;
453                         if((c=pl_nextc(g))=='-'){
454                                 while((c=pl_nextc(g))=='-')
455                                         ;
456                                 if(c==ETAG || c==EOF)
457                                         break;
458                         }
459                 }
460         } else {
461                 /* <! eats everything until > or EOF */
462                 while(c!=ETAG && c!=EOF)
463                         c=pl_nextc(g);
464         }
465         if(c==EOF)
466                 htmlerror(g->name, g->lineno, "EOF in comment");
467         g->tag=Tag_comment;
468         g->attr->name=0;
469         g->token[0]='\0';
470         return TAG;
471 }
472
473 int lrunetochar(char *p, int v)
474 {
475         Rune r;
476
477         r=v;
478         return runetochar(p, &r);
479 }
480
481 int pl_getscript(Hglob *g){
482         char *tokp, *t;
483         int c;
484         tokp = g->token;
485         *tokp++ = '<';
486         while((c=pl_nextc(g)) != EOF){
487                 if(c==STAG || c==' ' || c=='\t' || c=='\n'){
488                         pl_putback(g, c);
489                         break;
490                 }
491                 if(c==ETAG) c='>';
492                 tokp += lrunetochar(tokp, c);
493                 if(c==0 || c=='>' || tokp >= &g->token[NTOKEN-UTFmax-1])
494                         break;
495         }
496         *tokp = '\0';
497         t = tag[g->state->tag].name;
498         if(g->token[1] == '/' && cistrncmp(g->token+2, t, strlen(t)) == 0){
499                 g->tag=g->state->tag;
500                 g->attr->name=0;
501                 return ENDTAG;
502         }
503         pl_rmentities(g, g->token);
504         g->nsp=g->spacc;
505         g->spacc=0;
506         return TEXT;
507 }
508
509 /*
510  * Read a start or end tag -- the caller has read the initial <
511  */
512 int pl_gettag(Hglob *g){
513         char *tokp;
514         int c, q;
515         if(g->state->isscript)
516                 return pl_getscript(g);
517         if((c=pl_nextc(g))=='!' || c=='?')
518                 return pl_getcomment(g);
519         pl_putback(g, c);
520         q = 0;
521         tokp=g->token;
522         while((c=pl_nextc(g))!=EOF){
523                 if(c == '=' && q == 0)
524                         q = '=';
525                 else if(c == '\'' || c == '"'){
526                         if(q == '=')
527                                 q = c;
528                         else if(q == c)
529                                 q = 0;
530                 }
531                 else if(c == ETAG && q != '\'' && q != '"')
532                         break;
533                 else if(q == '=' && c != ' ' && c != '\t' && c != '\n')
534                         q = 0;
535                 if(tokp < &g->token[NTOKEN-UTFmax-1])
536                         tokp += lrunetochar(tokp, c);
537         }
538         *tokp='\0';
539         if(c==EOF) htmlerror(g->name, g->lineno, "EOF in tag");
540         pl_tagparse(g, g->token);
541         if(g->token[0]!='/') return TAG;
542         if(g->attr[0].name!=0)
543                 htmlerror(g->name, g->lineno, "end tag should not have attributes");
544         return ENDTAG;
545 }
546 /*
547  * The next token is a tag, an end tag or a sequence of non-white
548  * characters. If inside <pre>, single newlines are converted to <br>,
549  * double newlines are converted to <p> and spaces are preserved.
550  * Otherwise, spaces and newlines are noted and discarded.
551  */
552 int pl_gettoken(Hglob *g){
553         char *tokp;
554         int c;
555         if(g->state->pre) switch(c=pl_nextc(g)){
556         case STAG: return pl_gettag(g);
557         case EOF: return EOF;
558         case '\n':
559                 switch(c=pl_nextc(g)){
560                 case '\n':
561                         pl_tagparse(g, "p");
562                         return TAG;
563                 default:
564                         pl_tagparse(g, "br");
565                         pl_putback(g, c);
566                         return TAG;
567                 }
568         default:
569                 tokp=g->token;
570                 while(c=='\t'){
571                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
572                         c=pl_nextc(g);
573                 }
574                 while(c!='\t' && c!='\n' && c!=STAG && c!=EOF){
575                         if(c==ETAG) c='>';
576                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
577                         c=pl_nextc(g);
578                 }
579                 *tokp='\0';
580                 pl_rmentities(g, g->token);
581                 pl_putback(g, c);
582                 g->nsp=0;
583                 g->spacc=0;
584                 return TEXT;
585         }
586         while((c=pl_nextc(g))==' ' || c=='\t' || c=='\n')
587                 if(g->spacc!=-1)
588                         g->spacc++;
589         switch(c){
590         case STAG: return pl_gettag(g);
591         case EOF: return EOF;
592         default:
593                 tokp=g->token;
594                 do{
595                         if(c==ETAG) c='>';
596                         if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
597                         c=pl_nextc(g);
598                 }while(c!=' ' && c!='\t' && c!='\n' && c!=STAG && c!=EOF);
599                 *tokp='\0';
600                 pl_rmentities(g, g->token);
601                 pl_putback(g, c);
602                 g->nsp=g->spacc;
603                 g->spacc=0;
604                 return TEXT;
605         }
606 }
607 char *pl_getattr(Pair *attr, char *name){
608         for(;attr->name;attr++)
609                 if(strcmp(attr->name, name)==0)
610                         return attr->value;
611         return 0;
612 }
613 int pl_hasattr(Pair *attr, char *name){
614         for(;attr->name;attr++)
615                 if(strcmp(attr->name, name)==0)
616                         return 1;
617         return 0;
618 }
619 void plaintext(Hglob *g){
620         char line[NLINE];
621         char *lp, *elp;
622         int c;
623         g->state->font=CWIDTH;
624         g->state->size=NORMAL;
625         elp=&line[NLINE-UTFmax-1];
626         lp=line;
627         for(;;){
628                 c=pl_readc(g);
629                 if(c==EOF) break;
630                 if(c=='\n' || lp>=elp){
631                         *lp='\0';
632                         g->linebrk=1;
633                         pl_htmloutput(g, 0, line, 0);
634                         lp=line;
635                 }
636                 if(c=='\t'){
637                         do *lp++=' '; while(lp<elp && utfnlen(line, lp-line)%8!=0);
638                 }
639                 else if(c!='\n')
640                         lp += lrunetochar(lp, c);
641         }
642         if(lp!=line){
643                 *lp='\0';
644                 g->linebrk=1;
645                 pl_htmloutput(g, 0, line, 0);
646         }
647 }
648 void plrdplain(char *name, int fd, Www *dst){
649         Hglob g;
650         g.state=g.stack;
651         g.state->tag=Tag_html;
652         g.state->font=CWIDTH;
653         g.state->size=NORMAL;
654         g.state->pre=0;
655         g.state->image[0]=0;
656         g.state->link[0]=0;
657         g.state->name[0]=0;
658         g.state->margin=0;
659         g.state->indent=20;
660         g.state->ismap=0;
661         g.dst=dst;
662         g.hfd=fd;
663         g.name=name;
664         g.ehbuf=g.hbufp=g.hbuf;
665         g.npeekc=0;
666         g.heof=0;
667         g.lineno=1;
668         g.linebrk=1;
669         g.para=0;
670         g.text=dst->title;
671         g.tp=g.text;
672         g.etext=g.text+NTITLE-1;
673         g.spacc=0;
674         g.form=0;
675         nstrcpy(g.text, name, NTITLE);
676         plaintext(&g);
677         finish(dst);
678 }
679 void plrdhtml(char *name, int fd, Www *dst){
680         int t, tagerr;
681         Stack *sp;
682         char buf[20];
683         char *str;
684         Hglob g;
685
686         g.state=g.stack;
687         g.state->tag=Tag_html;
688         g.state->font=ROMAN;
689         g.state->size=NORMAL;
690         g.state->pre=0;
691         g.state->image[0]=0;
692         g.state->link[0]=0;
693         g.state->name[0]=0;
694         g.state->margin=0;
695         g.state->indent=25;
696         g.state->ismap=0;
697         g.state->isscript=0;
698         g.state->strike=0;
699         g.state->width=0;
700         g.state->height=0;
701         g.dst=dst;
702         g.hfd=fd;
703         g.name=name;
704         g.ehbuf=g.hbufp=g.hbuf;
705         g.npeekc=0;
706         g.heof=0;
707         g.lineno=1;
708         g.linebrk=1;
709         g.para=0;
710         g.text=dst->title;
711         g.tp=g.text;
712         g.etext=g.text+NTITLE-1;
713         dst->title[0]='\0';
714         g.spacc=0;
715         g.form=0;
716
717         for(;;) switch(pl_gettoken(&g)){
718         case TAG:
719                 switch(tag[g.tag].action){
720                 case OPTEND:
721                         for(sp=g.state;sp!=g.stack && sp->tag!=g.tag;--sp);
722                         if(sp->tag!=g.tag)
723                                 pl_pushstate(&g, g.tag);
724                         else
725                                 for(;g.state!=sp;--g.state)
726                                         if(tag[g.state->tag].action!=OPTEND)
727                                                 htmlerror(g.name, g.lineno,
728                                                         "end tag </%s> missing",
729                                                         tag[g.state->tag].name);
730                         break;
731                 case END:
732                         pl_pushstate(&g, g.tag);
733                         break;
734                 }
735                 str=pl_getattr(g.attr, "id");
736                 if(str && *str){
737                         char swap[NNAME];
738
739                         nstrcpy(swap, g.state->name, sizeof(swap));
740                         nstrcpy(g.state->name, str, sizeof(g.state->name));
741                         pl_htmloutput(&g, 0, "", 0);
742                         nstrcpy(g.state->name, swap, sizeof(g.state->name));
743                 }
744                 switch(g.tag){
745                 default:
746                         htmlerror(g.name, g.lineno,
747                                 "unimplemented tag <%s>", tag[g.tag].name);
748                         break;
749                 case Tag_end:   /* unrecognized start tag */
750                         break;
751                 case Tag_img:
752                         str=pl_getattr(g.attr, "src");
753                         if(str && *str)
754                                 nstrcpy(g.state->image, str, sizeof(g.state->image));
755                         else {
756                                 Pair *a;
757
758                                 /*
759                                  * hack to emulate javascript that rewrites some attribute
760                                  * into src= after page got loaded. just look for some
761                                  * attribute that looks like a url.
762                                  */
763                                 for(a = g.attr; a->name; a++){
764                                         if(strcmp(a->name, "longdesc") == 0)
765                                                 continue;
766                                         if(str = linkify(a->value)){
767                                                 nstrcpy(g.state->image, str, sizeof(g.state->image));
768                                                 free(str);
769                                                 break;
770                                         }
771                                 }
772                         }
773                         g.state->ismap=pl_hasattr(g.attr, "ismap");
774                         str=pl_getattr(g.attr, "width");
775                         if(str && *str)
776                                 g.state->width=strtolength(&g, HORIZ, str);
777                         str=pl_getattr(g.attr, "height");
778                         if(str && *str)
779                                 g.state->height=strtolength(&g, VERT, str);
780                         str=pl_getattr(g.attr, "alt");
781                         if(str==0 || *str == 0){
782                                 if(g.state->image[0])
783                                         str=g.state->image;
784                                 else
785                                         str="[[image]]";
786                         }
787                         pl_htmloutput(&g, 0, str, 0);
788                         g.state->image[0]=0;
789                         g.state->ismap=0;
790                         g.state->width=0;
791                         g.state->height=0;
792                         break;
793                 case Tag_plaintext:
794                         g.spacc=0;
795                         plaintext(&g);
796                         break;
797                 case Tag_comment:
798                 case Tag_html:
799                 case Tag_link:
800                 case Tag_nextid:
801                 case Tag_table:
802                         break;
803                 case Tag_tr:
804                         g.spacc=0;
805                         g.linebrk=1;
806                         break;
807                 case Tag_td:
808                         g.spacc++;
809                         break;
810                 case Tag_base:
811                         str=pl_getattr(g.attr, "href");
812                         if(str && *str){
813                                 seturl(g.dst->url, str, g.dst->url->fullname);
814                                 nstrcpy(g.dst->url->fullname, str, sizeof(g.dst->url->fullname));
815                                 /* base should be a full url, but it often isnt so have to resolve */
816                                 urlresolve(g.dst->url);
817                         }
818                         break;
819                 case Tag_a:
820                         str=pl_getattr(g.attr, "name");
821                         if(str && *str)
822                                 nstrcpy(g.state->name, str, sizeof(g.state->name));
823                         pl_htmloutput(&g, 0, "", 0);
824                         str=pl_getattr(g.attr, "href");
825                         if(str && *str)
826                                 nstrcpy(g.state->link, str, sizeof(g.state->link));
827                         break;
828                 case Tag_meta:
829                         if((str=pl_getattr(g.attr, "http-equiv"))==0)
830                                 break;
831                         if(cistrcmp(str, "refresh"))
832                                 break;
833                         if((str=pl_getattr(g.attr, "content"))==0)
834                                 break;
835                         if((str=strchr(str, '='))==0)
836                                 break;
837                         str++;
838                         pl_htmloutput(&g, 0, "[refresh: ", 0);
839                         str=unquot(g.state->link, str, sizeof(g.state->link));
840                         pl_htmloutput(&g, 0, str, 0);
841                         g.state->link[0]=0;
842                         pl_htmloutput(&g, 0, "]", 0);
843                         g.linebrk=1;
844                         g.spacc=0;
845                         break;
846                 case Tag_source:
847                 case Tag_video:
848                 case Tag_audio:
849                 case Tag_embed:
850                 case Tag_frame:
851                 case Tag_iframe:
852                         snprint(buf, sizeof(buf), "[%s: ", tag[g.tag].name);
853                         pl_htmloutput(&g, 0, buf, 0);
854                         str=pl_getattr(g.attr, "src");
855                         if(str && *str)
856                                 nstrcpy(g.state->link, str, sizeof(g.state->link));
857                         str=pl_getattr(g.attr, "name");
858                         if(str && *str)
859                                 nstrcpy(g.state->name, str, sizeof(g.state->name));
860                         else
861                                 str = g.state->link;
862                         pl_htmloutput(&g, 0, str, 0);
863                         g.state->link[0]=0;
864                         g.state->name[0]=0;
865                         pl_htmloutput(&g, 0, "]", 0);
866                         g.linebrk=1;
867                         g.spacc=0;
868                         break;
869                 case Tag_address:
870                         g.spacc=0;
871                         g.linebrk=1;
872                         g.state->font=ROMAN;
873                         g.state->size=NORMAL;
874                         g.state->margin=300;
875                         g.state->indent=50;
876                         break;
877                 case Tag_b:
878                 case Tag_strong:
879                         g.state->font=BOLD;
880                         break;
881                 case Tag_s:
882                 case Tag_strike:
883                         g.state->strike=1;
884                         break;
885                 case Tag_blockquot:
886                         g.spacc=0;
887                         g.linebrk=1;
888                         g.state->margin+=50;
889                         g.state->indent=20;
890                         break;
891                 case Tag_body:
892                         break;
893                 case Tag_head:
894                         g.state->font=ROMAN;
895                         g.state->size=NORMAL;
896                         g.state->margin=0;
897                         g.state->indent=20;
898                         g.spacc=0;
899                         break;
900                 case Tag_div:
901                 case Tag_br:
902                         g.spacc=0;
903                         g.linebrk=1;
904                         break;
905                 case Tag_span:
906                 case Tag_center:
907                         /* more to come */
908                         break;
909                 case Tag_cite:
910                 case Tag_acronym:
911                         g.state->font=ITALIC;
912                         g.state->size=NORMAL;
913                         break;
914                 case Tag_code:
915                         g.state->font=CWIDTH;
916                         g.state->size=NORMAL;
917                         break;
918                 case Tag_dd:
919                         g.linebrk=1;
920                         g.state->indent=0;
921                         g.state->font=ROMAN;
922                         g.spacc=0;
923                         break;
924                 case Tag_dfn:
925                         htmlerror(g.name, g.lineno, "<dfn> deprecated");
926                 case Tag_abbr:
927                         g.state->font=BOLD;
928                         g.state->size=NORMAL;
929                         break;
930                 case Tag_dl:
931                         g.state->font=BOLD;
932                         g.state->size=NORMAL;
933                         g.state->margin+=40;
934                         g.spacc=0;
935                         break;
936                 case Tag_dt:
937                         g.para=1;
938                         g.state->indent=-40;
939                         g.state->font=BOLD;
940                         g.spacc=0;
941                         break;
942                 case Tag_font:
943                         /* more to come */
944                         break;
945                 case Tag_u:
946                         htmlerror(g.name, g.lineno, "<u> deprecated");
947                 case Tag_em:
948                 case Tag_i:
949                 case Tag_var:
950                         g.state->font=ITALIC;
951                         break;
952                 case Tag_h1:
953                         g.linebrk=1;
954                         g.state->font=BOLD;
955                         g.state->size=ENORMOUS;
956                         g.state->margin+=100;
957                         g.spacc=0;
958                         break;
959                 case Tag_h2:
960                         pl_linespace(&g);
961                         g.state->font=BOLD;
962                         g.state->size=ENORMOUS;
963                         g.spacc=0;
964                         break;
965                 case Tag_h3:
966                         g.linebrk=1;
967                         pl_linespace(&g);
968                         g.state->font=ITALIC;
969                         g.state->size=ENORMOUS;
970                         g.state->margin+=20;
971                         g.spacc=0;
972                         break;
973                 case Tag_h4:
974                         pl_linespace(&g);
975                         g.state->font=BOLD;
976                         g.state->size=LARGE;
977                         g.state->margin+=10;
978                         g.spacc=0;
979                         break;
980                 case Tag_h5:
981                         pl_linespace(&g);
982                         g.state->font=ITALIC;
983                         g.state->size=LARGE;
984                         g.state->margin+=10;
985                         g.spacc=0;
986                         break;
987                 case Tag_h6:
988                         pl_linespace(&g);
989                         g.state->font=BOLD;
990                         g.state->size=LARGE;
991                         g.spacc=0;
992                         break;
993                 case Tag_hr:
994                         g.spacc=0;
995                         plrtbitmap(&g.dst->text, 1000000, g.state->margin, hrule, 0, 0);
996                         break;
997                 case Tag_key:
998                         htmlerror(g.name, g.lineno, "<key> deprecated");
999                 case Tag_kbd:
1000                         g.state->font=CWIDTH;
1001                         break;
1002                 case Tag_dir:
1003                 case Tag_menu:
1004                 case Tag_ol:
1005                 case Tag_ul:
1006                         g.state->number=0;
1007                         g.linebrk=1;
1008                         g.state->margin+=25;
1009                         g.state->indent=-25;
1010                         g.spacc=0;
1011                         break;
1012                 case Tag_li:
1013                         g.spacc=0;
1014                         switch(g.state->tag){
1015                         default:
1016                                 htmlerror(g.name, g.lineno, "can't have <li> in <%s>",
1017                                         tag[g.state->tag].name);
1018                         case Tag_dir:   /* supposed to be multi-columns, can't do! */
1019                         case Tag_menu:
1020                                 g.linebrk=1;
1021                                 break;
1022                         case Tag_ol:
1023                                 g.para=1;
1024                                 snprint(buf, sizeof(buf), "%2d  ", ++g.state->number);
1025                                 pl_htmloutput(&g, 0, buf, 0);
1026                                 break;
1027                         case Tag_ul:
1028                                 g.para=0;
1029                                 g.linebrk=0;
1030                                 g.spacc=-1;
1031                                 plrtbitmap(&g.dst->text, 100000,
1032                                         g.state->margin+g.state->indent, bullet, 0, 0);
1033                                 break;
1034                         }
1035                         break;
1036                 case Tag_p:
1037                         pl_linespace(&g);
1038                         g.linebrk=1;
1039                         g.spacc=0;
1040                         break;
1041                 case Tag_listing:
1042                 case Tag_xmp:
1043                         htmlerror(g.name, g.lineno, "<%s> deprecated", tag[g.tag].name);
1044                 case Tag_pre:
1045                 case Tag_samp:
1046                         g.state->indent=0;
1047                         g.state->pre=1;
1048                         g.state->font=CWIDTH;
1049                         g.state->size=NORMAL;
1050                         pl_linespace(&g);
1051                         break;
1052                 case Tag_tt:
1053                         g.state->font=CWIDTH;
1054                         g.state->size=NORMAL;
1055                         break;
1056                 case Tag_title:
1057                         g.text=dst->title+strlen(dst->title);
1058                         g.tp=g.text;
1059                         g.etext=dst->title+NTITLE-1;
1060                         break;
1061                 case Tag_form:
1062                 case Tag_input:
1063                 case Tag_button:
1064                 case Tag_select:
1065                 case Tag_option:
1066                 case Tag_textarea:
1067                 case Tag_isindex:
1068                         rdform(&g);
1069                         break;
1070                 case Tag_script:
1071                 case Tag_style:
1072                         g.state->isscript=1;
1073                         break;
1074                 }
1075                 break;
1076
1077         case ENDTAG:
1078                 /*
1079                  * If the end tag doesn't match the top, we try to uncover a match
1080                  * on the stack.
1081                  */
1082                 if(g.state->tag!=g.tag){
1083                         tagerr=0;
1084                         for(sp=g.state;sp!=g.stack;--sp){
1085                                 if(sp->tag==g.tag)
1086                                         break;
1087                                 if(tag[g.state->tag].action!=OPTEND) tagerr++;
1088                         }
1089                         if(sp==g.stack){
1090                                 if(tagerr)
1091                                         htmlerror(g.name, g.lineno,
1092                                                 "end tag mismatch <%s>...</%s>, ignored",
1093                                                 tag[g.state->tag].name, tag[g.tag].name);
1094                         }
1095                         else{
1096                                 if(tagerr)
1097                                         htmlerror(g.name, g.lineno,
1098                                                 "end tag mismatch <%s>...</%s>, "
1099                                                 "intervening tags popped",
1100                                                 tag[g.state->tag].name, tag[g.tag].name);
1101                                 g.state=sp-1;
1102                         }
1103                 }
1104                 else if(g.state==g.stack)
1105                         htmlerror(g.name, g.lineno, "end tag </%s> at stack bottom",
1106                                 tag[g.tag].name);
1107                 else
1108                         --g.state;
1109                 switch(g.tag){
1110                 case Tag_select:
1111                 case Tag_form:
1112                 case Tag_textarea:
1113                         endform(&g);
1114                         break;
1115                 case Tag_h1:
1116                 case Tag_h2:
1117                 case Tag_h3:
1118                 case Tag_h4:
1119                         pl_linespace(&g);
1120                         break;
1121                 case Tag_div:
1122                 case Tag_address:
1123                 case Tag_blockquot:
1124                 case Tag_body:
1125                 case Tag_dir:
1126                 case Tag_dl:
1127                 case Tag_dt:
1128                 case Tag_h5:
1129                 case Tag_h6:
1130                 case Tag_listing:
1131                 case Tag_menu:
1132                 case Tag_ol:
1133                 case Tag_samp:
1134                 case Tag_title:
1135                 case Tag_ul:
1136                 case Tag_xmp:
1137                 case Tag_table:
1138                         g.linebrk=1;
1139                         break;
1140                 case Tag_pre:
1141                         pl_linespace(&g);
1142                         break;
1143                 }
1144                 break;
1145         case TEXT:
1146                 if(g.state->isscript)
1147                         continue;
1148                 if(g.state->link[0]==0 && (str = linkify(g.token))){
1149                         nstrcpy(g.state->link, str, sizeof(g.state->link));
1150                         pl_htmloutput(&g, g.nsp, g.token, 0);
1151                         g.state->link[0] = 0;
1152                         free(str);
1153                 } else
1154                         pl_htmloutput(&g, g.nsp, g.token, 0);
1155                 break;
1156         case EOF:
1157                 for(;g.state!=g.stack;--g.state)
1158                         if(tag[g.state->tag].action!=OPTEND)
1159                                 htmlerror(g.name, g.lineno,
1160                                         "missing </%s> at EOF", tag[g.state->tag].name);
1161                 *g.tp='\0';
1162                 getpix(dst->text, dst);
1163                 finish(dst);
1164                 return;
1165         }
1166 }