10 typedef struct Fontdata Fontdata;
16 "dejavusans/unicode.12", 0, 0,
17 "dejavusans/unicode.12", 0, 0,
18 "dejavusans/unicode.14", 0, 0,
19 "dejavusans/unicode.16", 0, 0,
21 "dejavusansit/unicode.12", 0, 0,
22 "dejavusansit/unicode.12", 0, 0,
23 "dejavusansit/unicode.14", 0, 0,
24 "dejavusansit/unicode.16", 0, 0,
26 "dejavusansbd/unicode.12", 0, 0,
27 "dejavusansbd/unicode.12", 0, 0,
28 "dejavusansbd/unicode.14", 0, 0,
29 "dejavusansbd/unicode.16", 0, 0,
31 "terminus/unicode.12", 0, 0,
32 "terminus/unicode.14", 0, 0,
33 "terminus/unicode.16", 0, 0,
34 "terminus/unicode.18", 0, 0,
36 Fontdata *pl_whichfont(int f, int s){
39 assert(f >= 0 && f < 4);
40 assert(s >= 0 && s < 4);
42 if(fontlist[f][s].font==0){
43 snprint(name, sizeof(name), "/lib/font/bit/%s.font", fontlist[f][s].name);
44 fontlist[f][s].font=openfont(display, name);
45 if(fontlist[f][s].font==0) fontlist[f][s].font=font;
46 fontlist[f][s].space=stringwidth(fontlist[f][s].font, "0");
48 return &fontlist[f][s];
57 void pl_pushstate(Hglob *g, int t){
59 if(g->state==&g->stack[NSTACK]){
60 htmlerror(g->name, g->lineno, "stack overflow at <%s>", tag[t].name);
63 g->state[0]=g->state[-1];
66 void pl_linespace(Hglob *g){
67 plrtbitmap(&g->dst->text, 1000000, 0, linespace, 0, 0);
72 int strtolength(Hglob *g, int dir, char *str){
77 if(cistrstr(str, "%"))
79 if(cistrstr(str, "em")){
80 p=stringsize(pl_whichfont(g->state->font, g->state->size)->font, "M");
81 return floor(f*((dir==HORIZ) ? p.x : p.y));
86 void pl_htmloutput(Hglob *g, int nsp, char *s, Field *field){
88 int space, indent, flags;
90 if(g->state->tag==Tag_title
91 /* || g->state->tag==Tag_textarea */
92 || g->state->tag==Tag_select){
94 if(g->tp!=g->text && g->tp!=g->etext && g->tp[-1]!=' ')
96 while(g->tp!=g->etext && *s) *g->tp++=*s++;
97 if(g->state->tag==Tag_title) g->dst->changed=1;
102 f=pl_whichfont(g->state->font, g->state->size);
104 indent=g->state->margin;
107 indent+=g->state->indent;
113 if(g->state->image[0]==0 && g->state->link[0]==0 && g->state->name[0]==0 && field==0)
116 ap=emalloc(sizeof(Action));
117 if(g->state->image[0])
118 ap->image = strdup(g->state->image);
119 if(g->state->link[0])
120 ap->link = strdup(g->state->link);
121 if(g->state->name[0])
122 ap->name = strdup(g->state->name);
123 ap->ismap=g->state->ismap;
124 ap->width=g->state->width;
125 ap->height=g->state->height;
129 if(indent<0) indent=0;
130 if(g->state->pre && s[0]=='\t'){
143 if(g->state->link[0])
147 plrtstr(&g->dst->text, space, indent, f->font, strdup(s), flags, ap);
154 * Buffered read, no translation
157 int pl_bread(Hglob *g){
160 if(g->hbufp==g->ehbuf){
161 n=read(g->hfd, g->hbuf, NHBUF);
164 snprint(err, sizeof(err), "%r reading %s", g->name);
165 pl_htmloutput(g, 1, err, 0);
174 if(c=='\n') g->lineno++;
178 * Read a character, translating \r\n, \n\r, \r and \n into \n
181 int pl_readc(Hglob *g){
183 char crune[UTFmax+1];
208 for (n=1; n<=sizeof(crune); n++){
209 if(fullrune(crune, n)){
210 chartorune(&r, crune);
220 void pl_putback(Hglob *g, int c){
221 if(g->npeekc==NPEEKC) htmlerror(g->name, g->lineno, "too much putback!");
222 else if(c!=EOF) g->peekc[g->npeekc++]=c;
224 int pl_nextc(Hglob *g){
227 if(g->heof) return EOF;
228 if(g->npeekc!=0) return g->peekc[--g->npeekc];
236 if('a'<=c && c<='z' || 'A'<=c && c<='Z') return STAG;
240 if(c=='!' || 'a'<=c && c<='z' || 'A'<=c && c<='Z' || c=='?') return STAG;
243 if(c=='>') return ETAG;
247 char *unquot(char *dst, char *src, int len){
251 while(*src && strchr(" \t\r\n", *src))
253 if(*src=='\'' || *src=='"'){
254 e=strrchr(src+1, *src);
257 if(e==0) e=strchr(src, 0);
261 if(len>0) memmove(dst, src, len);
265 int alnumchar(int c){
266 return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9';
269 return c=='#' || alnumchar(c);
272 /* return url if text token looks like a hyperlink */
273 char *linkify(char *s){
274 if(s == 0 && s[0] == 0)
276 if(!cistrncmp(s, "http://", 7))
278 if(!cistrncmp(s, "https://", 8))
280 if(!cistrncmp(s, "www.", 4)){
289 } else if(!alnumchar(s[i]))
293 return smprint("http://%s", s);
299 * remove entity references, in place.
301 * This doesn't work if removing an entity reference can lengthen the string!
302 * Fortunately, this doesn't happen.
304 void pl_rmentities(Hglob *g, char *s){
312 && ((*s=='#' && strchr("0123456789Xx", s[1]))
313 || 'a'<=*s && *s<='z'
314 || 'A'<=*s && *s<='Z')){
316 while(entchar(*s)) s++;
320 if(strcmp(u, "lt") == 0)
322 else if(strcmp(u, "gt") == 0)
324 else if(strcmp(u, "quot") == 0)
326 else if(strcmp(u, "apos") == 0)
328 else if(strcmp(u, "amp") == 0)
342 * Skip over white space
344 char *pl_white(char *s){
345 while(*s==' ' || *s=='\t' || *s=='\n' || *s=='\r') s++;
349 * Skip over HTML word
351 char *pl_word(char *s){
352 if ('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z') {
354 while('a'<=*s && *s<='z' || 'A'<=*s && *s<='Z' || '0'<=*s && *s<='9' ||
355 *s=='-' || *s=='.' || *s==':') s++;
360 * Skip to matching quote
362 char *pl_quote(char *s){
365 while(*s!=q && *s!='\0') s++;
368 void pl_dnl(char *s){
370 for(t=s;*s;s++) if(*s!='\r' && *s!='\n') *t++=*s;
373 void pl_tagparse(Hglob *g, char *str){
374 char *s, *t, *name, c;
379 if(str[0]=='!'){ /* test should be strncmp(str, "!--", 3)==0 */
384 if(str[0]=='/') str++;
387 if(*s!='/' && *s!=' ' && *s!='\n' && *s!='\t' && *s!='\0'){
388 htmlerror(g->name, g->lineno, "bad tag name in %s", str);
392 if(*s!='\0') *s++='\0';
393 for(t=name;t!=s;t++) if('A'<=*t && *t<='Z') *t+='a'-'A';
395 * Binary search would be faster here
397 for(tagp=tag;tagp->name;tagp++) if(strcmp(name, tagp->name)==0) break;
399 if(g->tag==Tag_end) htmlerror(g->name, g->lineno, "no tag %s", name);
411 for(s=ap->name;*s;s++) if('A'<=*s && *s<='Z') *s+='a'-'A';
414 if(*s=='\'' || *s=='"'){
418 htmlerror(g->name, g->lineno,
419 "No terminating quote in rhs of attribute %s",
428 /* read up to white space or > */
430 while(*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0') s++;
431 if(*s!='\0') *s++='\0';
433 pl_rmentities(g, ap->value);
439 if(ap==&g->attr[NATTR-1])
440 htmlerror(g->name, g->lineno, "too many attributes!");
444 int pl_getcomment(Hglob *g){
446 if((c=pl_nextc(g))=='-' && (c=pl_nextc(g))=='-'){
447 /* <!-- eats everything until --> or EOF */
449 while((c=pl_nextc(g))!='-' && c!=EOF)
453 if((c=pl_nextc(g))=='-'){
454 while((c=pl_nextc(g))=='-')
456 if(c==ETAG || c==EOF)
461 /* <! eats everything until > or EOF */
462 while(c!=ETAG && c!=EOF)
466 htmlerror(g->name, g->lineno, "EOF in comment");
473 int lrunetochar(char *p, int v)
478 return runetochar(p, &r);
481 int pl_getscript(Hglob *g){
486 while((c=pl_nextc(g)) != EOF){
487 if(c==STAG || c==' ' || c=='\t' || c=='\n'){
492 tokp += lrunetochar(tokp, c);
493 if(c==0 || c=='>' || tokp >= &g->token[NTOKEN-UTFmax-1])
497 t = tag[g->state->tag].name;
498 if(g->token[1] == '/' && cistrncmp(g->token+2, t, strlen(t)) == 0){
499 g->tag=g->state->tag;
503 pl_rmentities(g, g->token);
510 * Read a start or end tag -- the caller has read the initial <
512 int pl_gettag(Hglob *g){
515 if(g->state->isscript)
516 return pl_getscript(g);
517 if((c=pl_nextc(g))=='!' || c=='?')
518 return pl_getcomment(g);
522 while((c=pl_nextc(g))!=EOF){
523 if(c == '=' && q == 0)
525 else if(c == '\'' || c == '"'){
531 else if(c == ETAG && q != '\'' && q != '"')
533 else if(q == '=' && c != ' ' && c != '\t' && c != '\n')
535 if(tokp < &g->token[NTOKEN-UTFmax-1])
536 tokp += lrunetochar(tokp, c);
539 if(c==EOF) htmlerror(g->name, g->lineno, "EOF in tag");
540 pl_tagparse(g, g->token);
541 if(g->token[0]!='/') return TAG;
542 if(g->attr[0].name!=0)
543 htmlerror(g->name, g->lineno, "end tag should not have attributes");
547 * The next token is a tag, an end tag or a sequence of non-white
548 * characters. If inside <pre>, single newlines are converted to <br>,
549 * double newlines are converted to <p> and spaces are preserved.
550 * Otherwise, spaces and newlines are noted and discarded.
552 int pl_gettoken(Hglob *g){
555 if(g->state->pre) switch(c=pl_nextc(g)){
556 case STAG: return pl_gettag(g);
557 case EOF: return EOF;
559 switch(c=pl_nextc(g)){
564 pl_tagparse(g, "br");
571 if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
574 while(c!='\t' && c!='\n' && c!=STAG && c!=EOF){
576 if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
580 pl_rmentities(g, g->token);
586 while((c=pl_nextc(g))==' ' || c=='\t' || c=='\n')
590 case STAG: return pl_gettag(g);
591 case EOF: return EOF;
596 if(tokp < &g->token[NTOKEN-UTFmax-1]) tokp += lrunetochar(tokp, c);
598 }while(c!=' ' && c!='\t' && c!='\n' && c!=STAG && c!=EOF);
600 pl_rmentities(g, g->token);
607 char *pl_getattr(Pair *attr, char *name){
608 for(;attr->name;attr++)
609 if(strcmp(attr->name, name)==0)
613 int pl_hasattr(Pair *attr, char *name){
614 for(;attr->name;attr++)
615 if(strcmp(attr->name, name)==0)
619 void plaintext(Hglob *g){
623 g->state->font=CWIDTH;
624 g->state->size=NORMAL;
625 elp=&line[NLINE-UTFmax-1];
630 if(c=='\n' || lp>=elp){
633 pl_htmloutput(g, 0, line, 0);
637 do *lp++=' '; while(lp<elp && utfnlen(line, lp-line)%8!=0);
640 lp += lrunetochar(lp, c);
645 pl_htmloutput(g, 0, line, 0);
648 void plrdplain(char *name, int fd, Www *dst){
651 g.state->tag=Tag_html;
652 g.state->font=CWIDTH;
653 g.state->size=NORMAL;
664 g.ehbuf=g.hbufp=g.hbuf;
672 g.etext=g.text+NTITLE-1;
675 nstrcpy(g.text, name, NTITLE);
679 void plrdhtml(char *name, int fd, Www *dst){
687 g.state->tag=Tag_html;
689 g.state->size=NORMAL;
704 g.ehbuf=g.hbufp=g.hbuf;
712 g.etext=g.text+NTITLE-1;
717 for(;;) switch(pl_gettoken(&g)){
719 switch(tag[g.tag].action){
721 for(sp=g.state;sp!=g.stack && sp->tag!=g.tag;--sp);
723 pl_pushstate(&g, g.tag);
725 for(;g.state!=sp;--g.state)
726 if(tag[g.state->tag].action!=OPTEND)
727 htmlerror(g.name, g.lineno,
728 "end tag </%s> missing",
729 tag[g.state->tag].name);
732 pl_pushstate(&g, g.tag);
735 str=pl_getattr(g.attr, "id");
739 nstrcpy(swap, g.state->name, sizeof(swap));
740 nstrcpy(g.state->name, str, sizeof(g.state->name));
741 pl_htmloutput(&g, 0, "", 0);
742 nstrcpy(g.state->name, swap, sizeof(g.state->name));
746 htmlerror(g.name, g.lineno,
747 "unimplemented tag <%s>", tag[g.tag].name);
749 case Tag_end: /* unrecognized start tag */
752 str=pl_getattr(g.attr, "src");
754 nstrcpy(g.state->image, str, sizeof(g.state->image));
759 * hack to emulate javascript that rewrites some attribute
760 * into src= after page got loaded. just look for some
761 * attribute that looks like a url.
763 for(a = g.attr; a->name; a++){
764 if(strcmp(a->name, "longdesc") == 0)
766 if(str = linkify(a->value)){
767 nstrcpy(g.state->image, str, sizeof(g.state->image));
773 g.state->ismap=pl_hasattr(g.attr, "ismap");
774 str=pl_getattr(g.attr, "width");
776 g.state->width=strtolength(&g, HORIZ, str);
777 str=pl_getattr(g.attr, "height");
779 g.state->height=strtolength(&g, VERT, str);
780 str=pl_getattr(g.attr, "alt");
781 if(str==0 || *str == 0){
782 if(g.state->image[0])
787 pl_htmloutput(&g, 0, str, 0);
811 str=pl_getattr(g.attr, "href");
813 seturl(g.dst->url, str, g.dst->url->fullname);
814 nstrcpy(g.dst->url->fullname, str, sizeof(g.dst->url->fullname));
815 /* base should be a full url, but it often isnt so have to resolve */
816 urlresolve(g.dst->url);
820 str=pl_getattr(g.attr, "name");
822 nstrcpy(g.state->name, str, sizeof(g.state->name));
823 pl_htmloutput(&g, 0, "", 0);
824 str=pl_getattr(g.attr, "href");
826 nstrcpy(g.state->link, str, sizeof(g.state->link));
829 if((str=pl_getattr(g.attr, "http-equiv"))==0)
831 if(cistrcmp(str, "refresh"))
833 if((str=pl_getattr(g.attr, "content"))==0)
835 if((str=strchr(str, '='))==0)
838 pl_htmloutput(&g, 0, "[refresh: ", 0);
839 str=unquot(g.state->link, str, sizeof(g.state->link));
840 pl_htmloutput(&g, 0, str, 0);
842 pl_htmloutput(&g, 0, "]", 0);
852 snprint(buf, sizeof(buf), "[%s: ", tag[g.tag].name);
853 pl_htmloutput(&g, 0, buf, 0);
854 str=pl_getattr(g.attr, "src");
856 nstrcpy(g.state->link, str, sizeof(g.state->link));
857 str=pl_getattr(g.attr, "name");
859 nstrcpy(g.state->name, str, sizeof(g.state->name));
862 pl_htmloutput(&g, 0, str, 0);
865 pl_htmloutput(&g, 0, "]", 0);
873 g.state->size=NORMAL;
895 g.state->size=NORMAL;
911 g.state->font=ITALIC;
912 g.state->size=NORMAL;
915 g.state->font=CWIDTH;
916 g.state->size=NORMAL;
925 htmlerror(g.name, g.lineno, "<dfn> deprecated");
928 g.state->size=NORMAL;
932 g.state->size=NORMAL;
946 htmlerror(g.name, g.lineno, "<u> deprecated");
950 g.state->font=ITALIC;
955 g.state->size=ENORMOUS;
956 g.state->margin+=100;
962 g.state->size=ENORMOUS;
968 g.state->font=ITALIC;
969 g.state->size=ENORMOUS;
982 g.state->font=ITALIC;
995 plrtbitmap(&g.dst->text, 1000000, g.state->margin, hrule, 0, 0);
998 htmlerror(g.name, g.lineno, "<key> deprecated");
1000 g.state->font=CWIDTH;
1008 g.state->margin+=25;
1009 g.state->indent=-25;
1014 switch(g.state->tag){
1016 htmlerror(g.name, g.lineno, "can't have <li> in <%s>",
1017 tag[g.state->tag].name);
1018 case Tag_dir: /* supposed to be multi-columns, can't do! */
1024 snprint(buf, sizeof(buf), "%2d ", ++g.state->number);
1025 pl_htmloutput(&g, 0, buf, 0);
1031 plrtbitmap(&g.dst->text, 100000,
1032 g.state->margin+g.state->indent, bullet, 0, 0);
1043 htmlerror(g.name, g.lineno, "<%s> deprecated", tag[g.tag].name);
1048 g.state->font=CWIDTH;
1049 g.state->size=NORMAL;
1053 g.state->font=CWIDTH;
1054 g.state->size=NORMAL;
1057 g.text=dst->title+strlen(dst->title);
1059 g.etext=dst->title+NTITLE-1;
1072 g.state->isscript=1;
1079 * If the end tag doesn't match the top, we try to uncover a match
1082 if(g.state->tag!=g.tag){
1084 for(sp=g.state;sp!=g.stack;--sp){
1087 if(tag[g.state->tag].action!=OPTEND) tagerr++;
1091 htmlerror(g.name, g.lineno,
1092 "end tag mismatch <%s>...</%s>, ignored",
1093 tag[g.state->tag].name, tag[g.tag].name);
1097 htmlerror(g.name, g.lineno,
1098 "end tag mismatch <%s>...</%s>, "
1099 "intervening tags popped",
1100 tag[g.state->tag].name, tag[g.tag].name);
1104 else if(g.state==g.stack)
1105 htmlerror(g.name, g.lineno, "end tag </%s> at stack bottom",
1146 if(g.state->isscript)
1148 if(g.state->link[0]==0 && (str = linkify(g.token))){
1149 nstrcpy(g.state->link, str, sizeof(g.state->link));
1150 pl_htmloutput(&g, g.nsp, g.token, 0);
1151 g.state->link[0] = 0;
1154 pl_htmloutput(&g, g.nsp, g.token, 0);
1157 for(;g.state!=g.stack;--g.state)
1158 if(tag[g.state->tag].action!=OPTEND)
1159 htmlerror(g.name, g.lineno,
1160 "missing </%s> at EOF", tag[g.state->tag].name);
1162 getpix(dst->text, dst);