]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libhtml/build.c
7855d8bdeb4961ce1813e4e4f4716b6a50efb829
[plan9front.git] / sys / src / libhtml / build.c
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <ctype.h>
5 #include <html.h>
6 #include "impl.h"
7
8 // A stack for holding integer values
9 enum {
10         Nestmax = 40    // max nesting level of lists, font styles, etc.
11 };
12
13 struct Stack {
14         int             n;                              // next available slot (top of stack is stack[n-1])
15         int             slots[Nestmax]; // stack entries
16 };
17
18 // Parsing state
19 struct Pstate
20 {
21         Pstate* next;                   // in stack of Pstates
22         int             skipping;               // true when we shouldn't add items
23         int             skipwhite;              // true when we should strip leading space
24         int             curfont;                // font index for current font
25         int             curfg;          // current foreground color
26         Background      curbg;  // current background
27         int             curvoff;                // current baseline offset
28         uchar   curul;          // current underline/strike state
29         uchar   curjust;                // current justify state
30         int             curanchor;      // current (href) anchor id (if in one), or 0
31         int             curstate;               // current value of item state
32         int             literal;                // current literal state
33         int             inpar;          // true when in a paragraph-like construct
34         int             adjsize;                // current font size adjustment
35         Item*   items;          // dummy head of item list we're building
36         Item*   lastit;         // tail of item list we're building
37         Item*   prelastit;              // item before lastit
38         Stack   fntstylestk;    // style stack
39         Stack   fntsizestk;             // size stack
40         Stack   fgstk;          // text color stack
41         Stack   ulstk;          // underline stack
42         Stack   voffstk;                // vertical offset stack
43         Stack   listtypestk;    // list type stack
44         Stack   listcntstk;             // list counter stack
45         Stack   juststk;                // justification stack
46         Stack   hangstk;                // hanging stack
47 };
48
49 struct ItemSource
50 {
51         Docinfo*                doc;
52         Pstate*         psstk;
53         int                     nforms;
54         int                     ntables;
55         int                     nanchors;
56         int                     nframes;
57         Form*           curform;
58         Map*            curmap;
59         Table*          tabstk;
60         Kidinfo*                kidstk;
61 };
62
63 // Some layout parameters
64 enum {
65         FRKIDMARGIN = 6,        // default margin around kid frames
66         IMGHSPACE = 0,  // default hspace for images (0 matches IE, Netscape)
67         IMGVSPACE = 0,  // default vspace for images
68         FLTIMGHSPACE = 2,       // default hspace for float images
69         TABSP = 5,              // default cellspacing for tables
70         TABPAD = 1,             // default cell padding for tables
71         LISTTAB = 1,            // number of tabs to indent lists
72         BQTAB = 1,              // number of tabs to indent blockquotes
73         HRSZ = 2,                       // thickness of horizontal rules
74         SUBOFF = 4,             // vertical offset for subscripts
75         SUPOFF = 6,             // vertical offset for superscripts
76         NBSP = 160              // non-breaking space character
77 };
78
79 // These tables must be sorted
80 static StringInt align_tab[] = {
81         {L"baseline",   ALbaseline},
82         {L"bottom",     ALbottom},
83         {L"center",     ALcenter},
84         {L"char",               ALchar},
85         {L"justify",    ALjustify},
86         {L"left",               ALleft},
87         {L"middle",     ALmiddle},
88         {L"right",              ALright},
89         {L"top",                ALtop}
90 };
91 #define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt))
92
93 static StringInt input_tab[] = {
94         {L"button",     Fbutton},
95         {L"checkbox",   Fcheckbox},
96         {L"file",               Ffile},
97         {L"hidden",     Fhidden},
98         {L"image",      Fimage},
99         {L"password",   Fpassword},
100         {L"radio",              Fradio},
101         {L"reset",              Freset},
102         {L"submit",     Fsubmit},
103         {L"text",               Ftext}
104 };
105 #define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt))
106
107 static StringInt clear_tab[] = {
108         {L"all",        IFcleft|IFcright},
109         {L"left",       IFcleft},
110         {L"right",      IFcright}
111 };
112 #define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt))
113
114 static StringInt fscroll_tab[] = {
115         {L"auto",       FRhscrollauto|FRvscrollauto},
116         {L"no", FRnoscroll},
117         {L"yes",        FRhscroll|FRvscroll},
118 };
119 #define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt))
120
121 static StringInt shape_tab[] = {
122         {L"circ",               SHcircle},
123         {L"circle",             SHcircle},
124         {L"poly",               SHpoly},
125         {L"polygon",    SHpoly},
126         {L"rect",               SHrect},
127         {L"rectangle",  SHrect}
128 };
129 #define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt))
130
131 static StringInt method_tab[] = {
132         {L"get",                HGet},
133         {L"post",               HPost}
134 };
135 #define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt))
136
137 static Rune* roman[15]= {
138         L"I", L"II", L"III", L"IV", L"V", L"VI", L"VII", L"VIII", L"IX", L"X",
139         L"XI", L"XII", L"XIII", L"XIV", L"XV"
140 };
141 #define NROMAN 15
142
143 // List number types
144 enum {
145         LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI
146 };
147
148 enum {
149         SPBefore = 2,
150         SPAfter = 4,
151         BL = 1,
152         BLBA = (BL|SPBefore|SPAfter)
153 };
154
155 // blockbrk[tag] is break info for a block level element, or one
156 // of a few others that get the same treatment re ending open paragraphs
157 // and requiring a line break / vertical space before them.
158 // If we want a line of space before the given element, SPBefore is OR'd in.
159 // If we want a line of space after the given element, SPAfter is OR'd in.
160
161 static uchar blockbrk[Numtags]= {
162         [Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL,
163         [Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA,
164         [Tdt] BL, [Tform] BLBA,
165         // headings and tables get breaks added manually
166         [Th1] BL, [Th2] BL, [Th3] BL,
167         [Th4] BL, [Th5] BL, [Th6] BL,
168         [Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA,
169         [Tol] BLBA, [Tp] BLBA, [Tpre] BLBA,
170         [Tul] BLBA
171 };
172
173 enum {
174         AGEN = 1
175 };
176
177 // attrinfo is information about attributes.
178 // The AGEN value means that the attribute is generic (applies to almost all elements)
179 static uchar attrinfo[Numattrs]= {
180         [Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN,
181         [Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN,
182         [Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN,
183         [Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN,
184         [Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN,
185         [Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN,
186         [Aonsubmit] AGEN, [Aonunload] AGEN
187 };
188
189 static uchar scriptev[Numattrs]= {
190         [Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick,
191         [Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress,
192         [Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown,
193         [Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover,
194         [Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect,
195         [Aonsubmit] SEonsubmit, [Aonunload] SEonunload
196 };
197
198 // Color lookup table
199 static StringInt color_tab[] = {
200         {L"aqua", 0x00FFFF},
201         {L"black",  0x000000},
202         {L"blue", 0x0000CC},
203         {L"fuchsia", 0xFF00FF},
204         {L"gray", 0x808080},
205         {L"green", 0x008000},
206         {L"lime", 0x00FF00},
207         {L"maroon", 0x800000},
208         {L"navy", 0x000080,},
209         {L"olive", 0x808000},
210         {L"purple", 0x800080},
211         {L"red", 0xFF0000},
212         {L"silver", 0xC0C0C0},
213         {L"teal", 0x008080},
214         {L"white", 0xFFFFFF},
215         {L"yellow", 0xFFFF00}
216 };
217 #define NCOLORS (sizeof(color_tab)/sizeof(StringInt))
218
219 static StringInt                *targetmap;
220 static int                      targetmapsize;
221 static int                      ntargets;
222
223 static int buildinited = 0;
224
225 #define SMALLBUFSIZE 240
226 #define BIGBUFSIZE 2000
227
228 int     dbgbuild = 0;
229 int     warn = 0;
230
231 static Align            aalign(Token* tok);
232 static int                      acolorval(Token* tok, int attid, int dflt);
233 static void                     addbrk(Pstate* ps, int sp, int clr);
234 static void                     additem(Pstate* ps, Item* it, Token* tok);
235 static void                     addlinebrk(Pstate* ps, int clr);
236 static void                     addnbsp(Pstate* ps);
237 static void                     addtext(Pstate* ps, Rune* s);
238 static Dimen            adimen(Token* tok, int attid);
239 static int                      aflagval(Token* tok, int attid);
240 static int                      aintval(Token* tok, int attid, int dflt);
241 static Rune*            astrval(Token* tok, int attid, Rune* dflt);
242 static int                      atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt);
243 static int                      atargval(Token* tok, int dflt);
244 static int                      auintval(Token* tok, int attid, int dflt);
245 static Rune*            aurlval(Token* tok, int attid, Rune* dflt, Rune* base);
246 static Rune*            aval(Token* tok, int attid);
247 static void                     buildinit(void);
248 static Pstate*          cell_pstate(Pstate* oldps, int ishead);
249 static void                     changehang(Pstate* ps, int delta);
250 static void                     changeindent(Pstate* ps, int delta);
251 static int                      color(Rune* s, int dflt);
252 static void                     copystack(Stack* tostk, Stack* fromstk);
253 static int                      dimprint(char* buf, int nbuf, Dimen d);
254 static Pstate*          finishcell(Table* curtab, Pstate* psstk);
255 static void                     finish_table(Table* t);
256 static void                     freeanchor(Anchor* a);
257 static void                     freedestanchor(DestAnchor* da);
258 static void                     freeform(Form* f);
259 static void                     freeformfield(Formfield* ff);
260 static void                     freeitem(Item* it);
261 static void                     freepstate(Pstate* p);
262 static void                     freepstatestack(Pstate* pshead);
263 static void                     freescriptevents(SEvent* ehead);
264 static void                     freetable(Table* t);
265 static Map*             getmap(Docinfo* di, Rune* name);
266 static Rune*            getpcdata(Token* toks, int tokslen, int* ptoki);
267 static Pstate*          lastps(Pstate* psl);
268 static Rune*            listmark(uchar ty, int n);
269 static int                      listtyval(Token* tok, int dflt);
270 static Align            makealign(int halign, int valign);
271 static Background       makebackground(Rune* imgurl, int color);
272 static Dimen            makedimen(int kind, int spec);
273 static Anchor*          newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
274 static Area*            newarea(int shape, Rune* href, int target, Area* link);
275 static DestAnchor*      newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
276 static Docinfo*         newdocinfo(void);
277 static Genattr*         newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, Attr* events);
278 static Form*            newform(int formid, Rune* name, Rune* action,
279                                         int target, int method, Form* link);
280 static Formfield*       newformfield(int ftype, int fieldid, Form* form, Rune* name,
281                                         Rune* value, int size, int maxlength, Formfield* link);
282 static Item*            newifloat(Item* it, int side);
283 static Item*            newiformfield(Formfield* ff);
284 static Item*            newiimage(Rune* src, Rune* altrep, int align, int width, int height,
285                                         int hspace, int vspace, int border, int ismap, Map* map);
286 static Item*            newirule(int align, int size, int noshade, int color, Dimen wspec);
287 static Item*            newispacer(int spkind);
288 static Item*            newitable(Table* t);
289 static ItemSource*      newitemsource(Docinfo* di);
290 static Item*            newitext(Rune* s, int fnt, int fg, int voff, int ul);
291 static Kidinfo*         newkidinfo(int isframeset, Kidinfo* link);
292 static Option*          newoption(int selected, Rune* value, Rune* display, Option* link);
293 static Pstate*          newpstate(Pstate* link);
294 static SEvent*          newscriptevent(int type, Rune* script, SEvent* link);
295 static Table*           newtable(int tableid, Align align, Dimen width, int border,
296                                         int cellspacing, int cellpadding, Background bg, Token* tok, Table* link);
297 static Tablecell*       newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec,
298                                         int hspec, Background bg, int flags, Tablecell* link);
299 static Tablerow*        newtablerow(Align align, Background bg, int flags, Tablerow* link);
300 static Dimen            parsedim(Rune* s, int ns);
301 static void                     pop(Stack* stk);
302 static void                     popfontsize(Pstate* ps);
303 static void                     popfontstyle(Pstate* ps);
304 static void                     popjust(Pstate* ps);
305 static int                      popretnewtop(Stack* stk, int dflt);
306 static int                      push(Stack* stk, int val);
307 static void                     pushfontsize(Pstate* ps, int sz);
308 static void                     pushfontstyle(Pstate* ps, int sty);
309 static void                     pushjust(Pstate* ps, int j);
310 static Item*            textit(Pstate* ps, Rune* s);
311 static Rune*            removeallwhite(Rune* s);
312 static void                     resetdocinfo(Docinfo* d);
313 static void                     setcurfont(Pstate* ps);
314 static void                     setcurjust(Pstate* ps);
315 static void                     setdimarray(Token* tok, int attid, Dimen** pans, int* panslen);
316 static Rune*            stringalign(int a);
317 static void                     targetmapinit(void);
318 static int                      toint(Rune* s);
319 static int                      top(Stack* stk, int dflt);
320 static void                     trim_cell(Tablecell* c);
321 static int                      validalign(Align a);
322 static int                      validdimen(Dimen d);
323 static int                      validformfield(Formfield* f);
324 static int                      validhalign(int a);
325 static int                      validptr(void* p);
326 static int                      validStr(Rune* s);
327 static int                      validtable(Table* t);
328 static int                      validtablerow(Tablerow* r);
329 static int                      validtablecol(Tablecol* c);
330 static int                      validtablecell(Tablecell* c);
331 static int                      validvalign(int a);
332 static int                      Iconv(Fmt *f);
333
334 static void
335 buildinit(void)
336 {
337         fmtinstall('I', Iconv);
338         targetmapinit();
339         buildinited = 1;
340 }
341
342 static ItemSource*
343 newitemsource(Docinfo* di)
344 {
345         ItemSource*     is;
346         Pstate* ps;
347
348         ps = newpstate(nil);
349         if(di->mediatype != TextHtml) {
350                 ps->curstate &= ~IFwrap;
351                 ps->literal = 1;
352                 pushfontstyle(ps, FntT);
353         }
354         is = (ItemSource*)emalloc(sizeof(ItemSource));
355         is->doc = di;
356         is->psstk = ps;
357         is->nforms = 0;
358         is->ntables = 0;
359         is->nanchors = 0;
360         is->nframes = 0;
361         is->curform = nil;
362         is->curmap = nil;
363         is->tabstk = nil;
364         is->kidstk = nil;
365         return is;
366 }
367
368 static Item *getitems(ItemSource* is, uchar* data, int datalen);
369
370 // Parse an html document and create a list of layout items.
371 // Allocate and return document info in *pdi.
372 // When caller is done with the items, it should call
373 // freeitems on the returned result, and then
374 // freedocinfo(*pdi).
375 Item*
376 parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi)
377 {
378         Item *it;
379         Docinfo*        di;
380         ItemSource*     is;
381
382         di = newdocinfo();
383         di->src = _Strdup(pagesrc);
384         di->base = _Strdup(pagesrc);
385         di->mediatype = mtype;
386         di->chset = chset;
387         *pdi = di;
388         is = newitemsource(di);
389         it = getitems(is, data, datalen);
390         freepstatestack(is->psstk);
391         free(is);
392         return it;
393 }
394
395 // Get a group of tokens for lexer, parse them, and create
396 // a list of layout items.
397 // When caller is done with the items, it should call
398 // freeitems on the returned result.
399 static Item*
400 getitems(ItemSource* is, uchar* data, int datalen)
401 {
402         int     i;
403         int     j;
404         int     nt;
405         int     pt;
406         int     doscripts;
407         int     tokslen;
408         int     toki;
409         int     h;
410         int     sz;
411         int     method;
412         int     n;
413         int     nblank;
414         int     norsz;
415         int     bramt;
416         int     sty;
417         int     nosh;
418         int     color;
419         int     oldcuranchor;
420         int     dfltbd;
421         int     v;
422         int     hang;
423         int     isempty;
424         int     tag;
425         int     brksp;
426         int     target;
427         uchar   brk;
428         uchar   flags;
429         uchar   align;
430         uchar   al;
431         uchar   ty;
432         uchar   ty2;
433         Pstate* ps;
434         Pstate* nextps;
435         Pstate* outerps;
436         Table*  curtab;
437         Token*  tok;
438         Token*  toks;
439         Docinfo*        di;
440         Item*   ans;
441         Item*   img;
442         Item*   ffit;
443         Item*   tabitem;
444         Rune*   s;
445         Rune*   t;
446         Rune*   name;
447         Rune*   enctype;
448         Rune*   usemap;
449         Rune*   prompt;
450         Rune*   equiv;
451         Rune*   val;
452         Rune*   nsz;
453         Rune*   script;
454         Map*    map;
455         Form*   frm;
456         Iimage* ii;
457         Kidinfo*        kd;
458         Kidinfo*        ks;
459         Kidinfo*        pks;
460         Dimen   wd;
461         Option* option;
462         Table*  tab;
463         Tablecell*      c;
464         Tablerow*       tr;
465         Formfield*      field;
466         Formfield*      ff;
467         Rune*   href;
468         Rune*   src;
469         Rune*   scriptsrc;
470         Rune*   bgurl;
471         Rune*   action;
472         Background      bg;
473
474         if(!buildinited)
475                 buildinit();
476         doscripts = 0;  // for now
477         ps = is->psstk;
478         curtab = is->tabstk;
479         di = is->doc;
480         toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
481         toki = 0;
482         for(; toki < tokslen; toki++) {
483                 tok = &toks[toki];
484                 if(dbgbuild > 1)
485                         fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok);
486                 tag = tok->tag;
487                 brk = 0;
488                 brksp = 0;
489                 if(tag < Numtags) {
490                         brk = blockbrk[tag];
491                         if(brk&SPBefore)
492                                 brksp = 1;
493                 }
494                 else if(tag < Numtags + RBRA) {
495                         brk = blockbrk[tag - RBRA];
496                         if(brk&SPAfter)
497                                 brksp = 1;
498                 }
499                 if(brk) {
500                         addbrk(ps, brksp, 0);
501                         if(ps->inpar) {
502                                 popjust(ps);
503                                 ps->inpar = 0;
504                         }
505                 }
506                 // check common case first (Data), then switch statement on tag
507                 if(tag == Data) {
508                         // Lexing didn't pay attention to SGML record boundary rules:
509                         // \n after start tag or before end tag to be discarded.
510                         // (Lex has already discarded all \r's).
511                         // Some pages assume this doesn't happen in <PRE> text,
512                         // so we won't do it if literal is true.
513                         // BUG: won't discard \n before a start tag that begins
514                         // the next bufferful of tokens.
515                         s = tok->text;
516                         n = _Strlen(s);
517                         if(!ps->literal) {
518                                 i = 0;
519                                 j = n;
520                                 if(toki > 0) {
521                                         pt = toks[toki - 1].tag;
522                                         // IE and Netscape both ignore this rule (contrary to spec)
523                                         // if previous tag was img
524                                         if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
525                                                 i++;
526                                 }
527                                 if(toki < tokslen - 1) {
528                                         nt = toks[toki + 1].tag;
529                                         if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
530                                                 j--;
531                                 }
532                                 if(i > 0 || j < n) {
533                                         t = s;
534                                         s = _Strsubstr(s, i, j);
535                                         free(t);
536                                         n = j-i;
537                                 }
538                         }
539                         if(ps->skipwhite) {
540                                 _trimwhite(s, n, &t, &nt);
541                                 if(t == nil) {
542                                         free(s);
543                                         s = nil;
544                                 }
545                                 else if(t != s) {
546                                         t = _Strndup(t, nt);
547                                         free(s);
548                                         s = t;
549                                 }
550                                 if(s != nil)
551                                         ps->skipwhite = 0;
552                         }
553                         tok->text = nil;                // token doesn't own string anymore
554                         if(s != nil)
555                                 addtext(ps, s);
556                 }
557                 else
558                         switch(tag) {
559                         // Some abbrevs used in following DTD comments
560                         // %text =      #PCDATA
561                         //              | TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
562                         //              | EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
563                         //              | A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
564                         //              | INPUT | SELECT | TEXTAREA
565                         // %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
566                         //              | BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
567                         // %flow = (%text | %block)*
568                         // %body.content = (%heading | %text | %block | ADDRESS)*
569
570                         // <!ELEMENT A - - (%text) -(A)>
571                         // Anchors are not supposed to be nested, but you sometimes see
572                         // href anchors inside destination anchors.
573                         case Ta:
574                                 if(ps->curanchor != 0) {
575                                         if(warn)
576                                                 fprint(2, "warning: nested <A> or missing </A>\n");
577                                         ps->curanchor = 0;
578                                 }
579                                 name = aval(tok, Aname);
580                                 href = aurlval(tok, Ahref, nil, di->base);
581                                 // ignore rel, rev, and title attrs
582                                 if(href != nil) {
583                                         target = atargval(tok, di->target);
584                                         di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
585                                         if(name != nil)
586                                                 name = _Strdup(name);   // for DestAnchor construction, below
587                                         ps->curanchor = is->nanchors;
588                                         ps->curfg = push(&ps->fgstk, di->link);
589                                         ps->curul = push(&ps->ulstk, ULunder);
590                                 }
591                                 if(name != nil) {
592                                         // add a null item to be destination
593                                         additem(ps, newispacer(ISPnull), tok);
594                                         di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
595                                 }
596                                 break;
597
598                         case Ta+RBRA :
599                                 if(ps->curanchor != 0) {
600                                         ps->curfg = popretnewtop(&ps->fgstk, di->text);
601                                         ps->curul = popretnewtop(&ps->ulstk, ULnone);
602                                         ps->curanchor = 0;
603                                 }
604                                 break;
605
606                         // <!ELEMENT APPLET - - (PARAM | %text)* >
607                         // We can't do applets, so ignore PARAMS, and let
608                         // the %text contents appear for the alternative rep
609                         case Tapplet:
610                         case Tapplet+RBRA:
611                                 if(warn && tag == Tapplet)
612                                         fprint(2, "warning: <APPLET> ignored\n");
613                                 break;
614
615                         // <!ELEMENT AREA - O EMPTY>
616                         case Tarea:
617                                 map = di->maps;
618                                 if(map == nil) {
619                                         if(warn)
620                                                 fprint(2, "warning: <AREA> not inside <MAP>\n");
621                                         continue;
622                                 }
623                                 map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
624                                         aurlval(tok, Ahref, nil, di->base),
625                                         atargval(tok, di->target),
626                                         map->areas);
627                                 setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
628                                 break;
629
630                         // <!ELEMENT (B|STRONG) - - (%text)*>
631                         case Tb:
632                         case Tstrong:
633                                 pushfontstyle(ps, FntB);
634                                 break;
635
636                         case Tb+RBRA:
637                         case Tcite+RBRA:
638                         case Tcode+RBRA:
639                         case Tdfn+RBRA:
640                         case Tem+RBRA:
641                         case Tkbd+RBRA:
642                         case Ti+RBRA:
643                         case Tsamp+RBRA:
644                         case Tstrong+RBRA:
645                         case Ttt+RBRA:
646                         case Tvar+RBRA :
647                         case Taddress+RBRA:
648                                 popfontstyle(ps);
649                                 break;
650
651                         // <!ELEMENT BASE - O EMPTY>
652                         case Tbase:
653                                 t = di->base;
654                                 di->base = aurlval(tok, Ahref, di->base, di->base);
655                                 if(t != nil)
656                                         free(t);
657                                 di->target = atargval(tok, di->target);
658                                 break;
659
660                         // <!ELEMENT BASEFONT - O EMPTY>
661                         case Tbasefont:
662                                 ps->adjsize = aintval(tok, Asize, 3) - 3;
663                                 break;
664
665                         // <!ELEMENT (BIG|SMALL) - - (%text)*>
666                         case Tbig:
667                         case Tsmall:
668                                 sz = ps->adjsize;
669                                 if(tag == Tbig)
670                                         sz += Large;
671                                 else
672                                         sz += Small;
673                                 pushfontsize(ps, sz);
674                                 break;
675
676                         case Tbig+RBRA:
677                         case Tsmall+RBRA:
678                                 popfontsize(ps);
679                                 break;
680
681                         // <!ELEMENT BLOCKQUOTE - - %body.content>
682                         case Tblockquote:
683                                 changeindent(ps, BQTAB);
684                                 break;
685
686                         case Tblockquote+RBRA:
687                                 changeindent(ps, -BQTAB);
688                                 break;
689
690                         // <!ELEMENT BODY O O %body.content>
691                         case Tbody:
692                                 ps->skipping = 0;
693                                 bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
694                                 bgurl = aurlval(tok, Abackground, nil, di->base);
695                                 if(bgurl != nil) {
696                                         if(di->backgrounditem != nil)
697                                                 freeitem((Item*)di->backgrounditem);
698                                                 // really should remove old item from di->images list,
699                                                 // but there should only be one BODY element ...
700                                         di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
701                                         di->backgrounditem->nextimage = di->images;
702                                         di->images = di->backgrounditem;
703                                 }
704                                 ps->curbg = bg;
705                                 di->background = bg;
706                                 di->text = acolorval(tok, Atext, di->text);
707                                 di->link = acolorval(tok, Alink, di->link);
708                                 di->vlink = acolorval(tok, Avlink, di->vlink);
709                                 di->alink = acolorval(tok, Aalink, di->alink);
710                                 if(di->text != ps->curfg) {
711                                         ps->curfg = di->text;
712                                         ps->fgstk.n = 0;
713                                 }
714                                 break;
715
716                         case Tbody+RBRA:
717                                 // HTML spec says ignore things after </body>,
718                                 // but IE and Netscape don't
719                                 // ps.skipping = 1;
720                                 break;
721
722                         // <!ELEMENT BR - O EMPTY>
723                         case Tbr:
724                                 addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
725                                 break;
726
727                         // <!ELEMENT CAPTION - - (%text;)*>
728                         case Tcaption:
729                                 if(curtab == nil) {
730                                         if(warn)
731                                                 fprint(2, "warning: <CAPTION> outside <TABLE>\n");
732                                         continue;
733                                 }
734                                 if(curtab->caption != nil) {
735                                         if(warn)
736                                                 fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
737                                         continue;
738                                 }
739                                 ps = newpstate(ps);
740                                 curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
741                                 break;
742
743                         case Tcaption+RBRA:
744                                 nextps = ps->next;
745                                 if(curtab == nil || nextps == nil) {
746                                         if(warn)
747                                                 fprint(2, "warning: unexpected </CAPTION>\n");
748                                         continue;
749                                 }
750                                 curtab->caption = ps->items->next;
751                                 free(ps);
752                                 ps = nextps;
753                                 break;
754
755                         case Tcenter:
756                         case Tdiv:
757                                 if(tag == Tcenter)
758                                         al = ALcenter;
759                                 else
760                                         al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust);
761                                 pushjust(ps, al);
762                                 break;
763
764                         case Tcenter+RBRA:
765                         case Tdiv+RBRA:
766                                 popjust(ps);
767                                 break;
768
769                         // <!ELEMENT DD - O  %flow >
770                         case Tdd:
771                                 if(ps->hangstk.n == 0) {
772                                         if(warn)
773                                                 fprint(2, "warning: <DD> not inside <DL\n");
774                                         continue;
775                                 }
776                                 h = top(&ps->hangstk, 0);
777                                 if(h != 0)
778                                         changehang(ps, -10*LISTTAB);
779                                 else
780                                         addbrk(ps, 0, 0);
781                                 push(&ps->hangstk, 0);
782                                 break;
783
784                         //<!ELEMENT (DIR|MENU) - - (LI)+ -(%block) >
785                         //<!ELEMENT (OL|UL) - - (LI)+>
786                         case Tdir:
787                         case Tmenu:
788                         case Tol:
789                         case Tul:
790                                 changeindent(ps, LISTTAB);
791                                 push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc));
792                                 push(&ps->listcntstk, aintval(tok, Astart, 1));
793                                 break;
794
795                         case Tdir+RBRA:
796                         case Tmenu+RBRA:
797                         case Tol+RBRA:
798                         case Tul+RBRA:
799                                 if(ps->listtypestk.n == 0) {
800                                         if(warn)
801                                                 fprint(2, "warning: %T ended no list\n", tok);
802                                         continue;
803                                 }
804                                 addbrk(ps, 0, 0);
805                                 pop(&ps->listtypestk);
806                                 pop(&ps->listcntstk);
807                                 changeindent(ps, -LISTTAB);
808                                 break;
809
810                         // <!ELEMENT DL - - (DT|DD)+ >
811                         case Tdl:
812                                 changeindent(ps, LISTTAB);
813                                 push(&ps->hangstk, 0);
814                                 break;
815
816                         case Tdl+RBRA:
817                                 if(ps->hangstk.n == 0) {
818                                         if(warn)
819                                                 fprint(2, "warning: unexpected </DL>\n");
820                                         continue;
821                                 }
822                                 changeindent(ps, -LISTTAB);
823                                 if(top(&ps->hangstk, 0) != 0)
824                                         changehang(ps, -10*LISTTAB);
825                                 pop(&ps->hangstk);
826                                 break;
827
828                         // <!ELEMENT DT - O (%text)* >
829                         case Tdt:
830                                 if(ps->hangstk.n == 0) {
831                                         if(warn)
832                                                 fprint(2, "warning: <DT> not inside <DL>\n");
833                                         continue;
834                                 }
835                                 h = top(&ps->hangstk, 0);
836                                 pop(&ps->hangstk);
837                                 if(h != 0)
838                                         changehang(ps, -10*LISTTAB);
839                                 changehang(ps, 10*LISTTAB);
840                                 push(&ps->hangstk, 1);
841                                 break;
842
843                         // <!ELEMENT FONT - - (%text)*>
844                         case Tfont:
845                                 sz = top(&ps->fntsizestk, Normal);
846                                 if(_tokaval(tok, Asize, &nsz, 0)) {
847                                         if(_prefix(L"+", nsz))
848                                                 sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize;
849                                         else if(_prefix(L"-", nsz))
850                                                 sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize;
851                                         else if(nsz != nil)
852                                                 sz = Normal + (_Strtol(nsz, nil, 10) - 3);
853                                 }
854                                 ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg));
855                                 pushfontsize(ps, sz);
856                                 break;
857
858                         case Tfont+RBRA:
859                                 if(ps->fgstk.n == 0) {
860                                         if(warn)
861                                                 fprint(2, "warning: unexpected </FONT>\n");
862                                         continue;
863                                 }
864                                 ps->curfg = popretnewtop(&ps->fgstk, di->text);
865                                 popfontsize(ps);
866                                 break;
867
868                         // <!ELEMENT FORM - - %body.content -(FORM) >
869                         case Tform:
870                                 if(is->curform != nil) {
871                                         if(warn)
872                                                 fprint(2, "warning: <FORM> nested inside another\n");
873                                         continue;
874                                 }
875                                 action = aurlval(tok, Aaction, di->base, di->base);
876                                 s = aval(tok, Aid);
877                                 name = astrval(tok, Aname, s);
878                                 if(s)
879                                         free(s);
880                                 target = atargval(tok, di->target);
881                                 method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet);
882                                 if(warn && _tokaval(tok, Aenctype, &enctype, 0) &&
883                                                 _Strcmp(enctype, L"application/x-www-form-urlencoded"))
884                                         fprint(2, "form enctype %S not handled\n", enctype);
885                                 frm = newform(++is->nforms, name, action, target, method, di->forms);
886                                 di->forms = frm;
887                                 is->curform = frm;
888                                 break;
889
890                         case Tform+RBRA:
891                                 if(is->curform == nil) {
892                                         if(warn)
893                                                 fprint(2, "warning: unexpected </FORM>\n");
894                                         continue;
895                                 }
896                                 // put fields back in input order
897                                 is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields);
898                                 is->curform = nil;
899                                 break;
900
901                         // <!ELEMENT FRAME - O EMPTY>
902                         case Tframe:
903                                 ks = is->kidstk;
904                                 if(ks == nil) {
905                                         if(warn)
906                                                 fprint(2, "warning: <FRAME> not in <FRAMESET>\n");
907                                         continue;
908                                 }
909                                 ks->kidinfos = kd = newkidinfo(0, ks->kidinfos);
910                                 kd->src = aurlval(tok, Asrc, nil, di->base);
911                                 kd->name = aval(tok, Aname);
912                                 if(kd->name == nil)
913                                         kd->name = runesmprint("_fr%d", ++is->nframes);
914                                 kd->marginw = auintval(tok, Amarginwidth, 0);
915                                 kd->marginh = auintval(tok, Amarginheight, 0);
916                                 kd->framebd = auintval(tok, Aframeborder, 1);
917                                 kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags);
918                                 norsz = aflagval(tok, Anoresize);
919                                 if(norsz)
920                                         kd->flags |= FRnoresize;
921                                 break;
922
923                         // <!ELEMENT FRAMESET - - (FRAME|FRAMESET)+>
924                         case Tframeset:
925                                 ks = newkidinfo(1, nil);
926                                 pks = is->kidstk;
927                                 if(pks == nil)
928                                         di->kidinfo = ks;
929                                 else  {
930                                         ks->next = pks->kidinfos;
931                                         pks->kidinfos = ks;
932                                 }
933                                 ks->nextframeset = pks;
934                                 is->kidstk = ks;
935                                 setdimarray(tok, Arows, &ks->rows, &ks->nrows);
936                                 if(ks->nrows == 0) {
937                                         ks->rows = (Dimen*)emalloc(sizeof(Dimen));
938                                         ks->nrows = 1;
939                                         ks->rows[0] = makedimen(Dpercent, 100);
940                                 }
941                                 setdimarray(tok, Acols, &ks->cols, &ks->ncols);
942                                 if(ks->ncols == 0) {
943                                         ks->cols = (Dimen*)emalloc(sizeof(Dimen));
944                                         ks->ncols = 1;
945                                         ks->cols[0] = makedimen(Dpercent, 100);
946                                 }
947                                 break;
948
949                         case Tframeset+RBRA:
950                                 if(is->kidstk == nil) {
951                                         if(warn)
952                                                 fprint(2, "warning: unexpected </FRAMESET>\n");
953                                         continue;
954                                 }
955                                 ks = is->kidstk;
956                                 // put kids back in original order
957                                 // and add blank frames to fill out cells
958                                 n = ks->nrows*ks->ncols;
959                                 nblank = n - _listlen((List*)ks->kidinfos);
960                                 while(nblank-- > 0)
961                                         ks->kidinfos = newkidinfo(0, ks->kidinfos);
962                                 ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos);
963                                 is->kidstk = is->kidstk->nextframeset;
964                                 if(is->kidstk == nil) {
965                                         // end input
966                                         ans = nil;
967                                         goto return_ans;
968                                 }
969                                 break;
970
971                         // <!ELEMENT H1 - - (%text;)*>, etc.
972                         case Th1:
973                         case Th2:
974                         case Th3:
975                         case Th4:
976                         case Th5:
977                         case Th6:
978                                 bramt = 1;
979                                 if(ps->items == ps->lastit)
980                                         bramt = 0;
981                                 addbrk(ps, bramt, IFcleft|IFcright);
982                                 sz = Verylarge - (tag - Th1);
983                                 if(sz < Tiny)
984                                         sz = Tiny;
985                                 pushfontsize(ps, sz);
986                                 sty = top(&ps->fntstylestk, FntR);
987                                 if(tag == Th1)
988                                         sty = FntB;
989                                 pushfontstyle(ps, sty);
990                                 pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
991                                 ps->skipwhite = 1;
992                                 break;
993
994                         case Th1+RBRA:
995                         case Th2+RBRA:
996                         case Th3+RBRA:
997                         case Th4+RBRA:
998                         case Th5+RBRA:
999                         case Th6+RBRA:
1000                                 addbrk(ps, 1, IFcleft|IFcright);
1001                                 popfontsize(ps);
1002                                 popfontstyle(ps);
1003                                 popjust(ps);
1004                                 break;
1005
1006                         case Thead:
1007                                 // HTML spec says ignore regular markup in head,
1008                                 // but Netscape and IE don't
1009                                 // ps.skipping = 1;
1010                                 break;
1011
1012                         case Thead+RBRA:
1013                                 ps->skipping = 0;
1014                                 break;
1015
1016                         // <!ELEMENT HR - O EMPTY>
1017                         case Thr:
1018                                 al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter);
1019                                 sz = auintval(tok, Asize, HRSZ);
1020                                 wd = adimen(tok, Awidth);
1021                                 if(dimenkind(wd) == Dnone)
1022                                         wd = makedimen(Dpercent, 100);
1023                                 nosh = aflagval(tok, Anoshade);
1024                                 color = acolorval(tok, Acolor, 0);
1025                                 additem(ps, newirule(al, sz, nosh, color, wd), tok);
1026                                 addbrk(ps, 0, 0);
1027                                 break;
1028
1029                         case Ti:
1030                         case Tcite:
1031                         case Tdfn:
1032                         case Tem:
1033                         case Tvar:
1034                         case Taddress:
1035                                 pushfontstyle(ps, FntI);
1036                                 break;
1037
1038                         // <!ELEMENT IMG - O EMPTY>
1039                         case Timg:
1040                                 map = nil;
1041                                 oldcuranchor = ps->curanchor;
1042                                 if(_tokaval(tok, Ausemap, &usemap, 0)) {
1043                                         if(!_prefix(L"#", usemap)) {
1044                                                 if(warn)
1045                                                         fprint(2, "warning: can't handle non-local map %S\n", usemap);
1046                                         }
1047                                         else {
1048                                                 map = getmap(di, usemap+1);
1049                                                 if(ps->curanchor == 0) {
1050                                                         di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
1051                                                         ps->curanchor = is->nanchors;
1052                                                 }
1053                                         }
1054                                 }
1055                                 align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
1056                                 dfltbd = 0;
1057                                 if(ps->curanchor != 0)
1058                                         dfltbd = 2;
1059                                 src = aurlval(tok, Asrc, nil, di->base);
1060                                 if(src == nil) {
1061                                         if(warn)
1062                                                 fprint(2, "warning: <img> has no src attribute\n");
1063                                         ps->curanchor = oldcuranchor;
1064                                         continue;
1065                                 }
1066                                 img = newiimage(src,
1067                                                 aval(tok, Aalt),
1068                                                 align,
1069                                                 auintval(tok, Awidth, 0),
1070                                                 auintval(tok, Aheight, 0),
1071                                                 auintval(tok, Ahspace, IMGHSPACE),
1072                                                 auintval(tok, Avspace, IMGVSPACE),
1073                                                 auintval(tok, Aborder, dfltbd),
1074                                                 aflagval(tok, Aismap),
1075                                                 map);
1076                                 if(align == ALleft || align == ALright) {
1077                                         additem(ps, newifloat(img, align), tok);
1078                                         // if no hspace specified, use FLTIMGHSPACE
1079                                         if(!_tokaval(tok, Ahspace, &val, 0))
1080                                                 ((Iimage*)img)->hspace = FLTIMGHSPACE;
1081                                 }
1082                                 else {
1083                                         ps->skipwhite = 0;
1084                                         additem(ps, img, tok);
1085                                 }
1086                                 if(!ps->skipping) {
1087                                         ((Iimage*)img)->nextimage = di->images;
1088                                         di->images = (Iimage*)img;
1089                                 }
1090                                 ps->curanchor = oldcuranchor;
1091                                 break;
1092
1093                         // <!ELEMENT INPUT - O EMPTY>
1094                         case Tinput:
1095                                 ps->skipwhite = 0;
1096                                 if(is->curform == nil) {
1097                                         if(warn)
1098                                                 fprint(2, "<INPUT> not inside <FORM>\n");
1099                                         continue;
1100                                 }
1101                                 is->curform->fields = field = newformfield(
1102                                                 atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
1103                                                 ++is->curform->nfields,
1104                                                 is->curform,
1105                                                 aval(tok, Aname),
1106                                                 aval(tok, Avalue),
1107                                                 auintval(tok, Asize, 0),
1108                                                 auintval(tok, Amaxlength, 1000),
1109                                                 is->curform->fields);
1110                                 if(aflagval(tok, Achecked))
1111                                         field->flags = FFchecked;
1112
1113                                 switch(field->ftype) {
1114                                 case Ftext:
1115                                 case Fpassword:
1116                                 case Ffile:
1117                                         if(field->size == 0)
1118                                                 field->size = 20;
1119                                         break;
1120
1121                                 case Fcheckbox:
1122                                         if(field->name == nil) {
1123                                                 if(warn)
1124                                                         fprint(2, "warning: checkbox form field missing name\n");
1125                                                 continue;
1126                                         }
1127                                         if(field->value == nil)
1128                                                 field->value = _Strdup(L"1");
1129                                         break;
1130
1131                                 case Fradio:
1132                                         if(field->name == nil || field->value == nil) {
1133                                                 if(warn)
1134                                                         fprint(2, "warning: radio form field missing name or value\n");
1135                                                 continue;
1136                                         }
1137                                         break;
1138
1139                                 case Fsubmit:
1140                                         if(field->value == nil)
1141                                                 field->value = _Strdup(L"Submit");
1142                                         if(field->name == nil)
1143                                                 field->name = _Strdup(L"_no_name_submit_");
1144                                         break;
1145
1146                                 case Fimage:
1147                                         src = aurlval(tok, Asrc, nil, di->base);
1148                                         if(src == nil) {
1149                                                 if(warn)
1150                                                         fprint(2, "warning: image form field missing src\n");
1151                                                 continue;
1152                                         }
1153                                         // width and height attrs aren't specified in HTML 3.2,
1154                                         // but some people provide them and they help avoid
1155                                         // a relayout
1156                                         field->image = newiimage(src,
1157                                                 astrval(tok, Aalt, L"Submit"),
1158                                                 atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom),
1159                                                 auintval(tok, Awidth, 0), auintval(tok, Aheight, 0),
1160                                                 0, 0, 0, 0, nil);
1161                                         ii = (Iimage*)field->image;
1162                                         ii->nextimage = di->images;
1163                                         di->images = ii;
1164                                         break;
1165
1166                                 case Freset:
1167                                         if(field->value == nil)
1168                                                 field->value = _Strdup(L"Reset");
1169                                         break;
1170
1171                                 case Fbutton:
1172                                         if(field->value == nil)
1173                                                 field->value = _Strdup(L" ");
1174                                         break;
1175                                 }
1176                                 ffit = newiformfield(field);
1177                                 additem(ps, ffit, tok);
1178                                 if(ffit->genattr != nil)
1179                                         field->events = ffit->genattr->events;
1180                                 break;
1181
1182                         // <!ENTITY ISINDEX - O EMPTY>
1183                         case Tisindex:
1184                                 ps->skipwhite = 0;
1185                                 prompt = astrval(tok, Aprompt, L"Index search terms:");
1186                                 target = atargval(tok, di->target);
1187                                 additem(ps, textit(ps, prompt), tok);
1188                                 frm = newform(++is->nforms,
1189                                                 nil,
1190                                                 di->base,
1191                                                 target,
1192                                                 HGet,
1193                                                 di->forms);
1194                                 di->forms = frm;
1195                                 ff = newformfield(Ftext,
1196                                                 1,
1197                                                 frm,
1198                                                 _Strdup(L"_ISINDEX_"),
1199                                                 nil,
1200                                                 50,
1201                                                 1000,
1202                                                 nil);
1203                                 frm->fields = ff;
1204                                 frm->nfields = 1;
1205                                 additem(ps, newiformfield(ff), tok);
1206                                 addbrk(ps, 1, 0);
1207                                 break;
1208
1209                         // <!ELEMENT LI - O %flow>
1210                         case Tli:
1211                                 if(ps->listtypestk.n == 0) {
1212                                         if(warn)
1213                                                 fprint(2, "<LI> not in list\n");
1214                                         continue;
1215                                 }
1216                                 ty = top(&ps->listtypestk, 0);
1217                                 ty2 = listtyval(tok, ty);
1218                                 if(ty != ty2) {
1219                                         ty = ty2;
1220                                         push(&ps->listtypestk, ty2);
1221                                 }
1222                                 v = aintval(tok, Avalue, top(&ps->listcntstk, 1));
1223                                 if(ty == LTdisc || ty == LTsquare || ty == LTcircle)
1224                                         hang = 10*LISTTAB - 3;
1225                                 else
1226                                         hang = 10*LISTTAB - 1;
1227                                 changehang(ps, hang);
1228                                 addtext(ps, listmark(ty, v));
1229                                 push(&ps->listcntstk, v + 1);
1230                                 changehang(ps, -hang);
1231                                 ps->skipwhite = 1;
1232                                 break;
1233
1234                         // <!ELEMENT MAP - - (AREA)+>
1235                         case Tmap:
1236                                 if(_tokaval(tok, Aname, &name, 0))
1237                                         is->curmap = getmap(di, name);
1238                                 break;
1239
1240                         case Tmap+RBRA:
1241                                 map = is->curmap;
1242                                 if(map == nil) {
1243                                         if(warn)
1244                                                 fprint(2, "warning: unexpected </MAP>\n");
1245                                         continue;
1246                                 }
1247                                 map->areas = (Area*)_revlist((List*)map->areas);
1248                                 break;
1249
1250                         case Tmeta:
1251                                 if(ps->skipping)
1252                                         continue;
1253                                 if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) {
1254                                         val = aval(tok, Acontent);
1255                                         n = _Strlen(equiv);
1256                                         if(!_Strncmpci(equiv, n, L"refresh"))
1257                                                 di->refresh = val;
1258                                         else if(!_Strncmpci(equiv, n, L"content-script-type")) {
1259                                                 n = _Strlen(val);
1260                                                 if(!_Strncmpci(val, n, L"javascript")
1261                                                    || !_Strncmpci(val, n, L"jscript1.1")
1262                                                    || !_Strncmpci(val, n, L"jscript"))
1263                                                         di->scripttype = TextJavascript;
1264                                                 else {
1265                                                         if(warn)
1266                                                                 fprint(2, "unimplemented script type %S\n", val);
1267                                                         di->scripttype = UnknownType;
1268                                                 }
1269                                         }
1270                                 }
1271                                 break;
1272
1273                         // Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web
1274                         case Tnobr:
1275                                 ps->skipwhite = 0;
1276                                 ps->curstate &= ~IFwrap;
1277                                 break;
1278
1279                         case Tnobr+RBRA:
1280                                 ps->curstate |= IFwrap;
1281                                 break;
1282
1283                         // We do frames, so skip stuff in noframes
1284                         case Tnoframes:
1285                                 ps->skipping = 1;
1286                                 break;
1287
1288                         case Tnoframes+RBRA:
1289                                 ps->skipping = 0;
1290                                 break;
1291
1292                         // We do scripts (if enabled), so skip stuff in noscripts
1293                         case Tnoscript:
1294                                 if(doscripts)
1295                                         ps->skipping = 1;
1296                                 break;
1297
1298                         case Tnoscript+RBRA:
1299                                 if(doscripts)
1300                                         ps->skipping = 0;
1301                                 break;
1302
1303                         // <!ELEMENT OPTION - O (       //PCDATA)>
1304                         case Toption:
1305                                 if(is->curform == nil || is->curform->fields == nil) {
1306                                         if(warn)
1307                                                 fprint(2, "warning: <OPTION> not in <SELECT>\n");
1308                                         continue;
1309                                 }
1310                                 field = is->curform->fields;
1311                                 if(field->ftype != Fselect) {
1312                                         if(warn)
1313                                                 fprint(2, "warning: <OPTION> not in <SELECT>\n");
1314                                         continue;
1315                                 }
1316                                 val = aval(tok, Avalue);
1317                                 option = newoption(aflagval(tok, Aselected), val, nil, field->options);
1318                                 field->options = option;
1319                                 option->display =  getpcdata(toks, tokslen, &toki);
1320                                 if(val == nil)
1321                                         option->value = _Strdup(option->display);
1322                                 break;
1323
1324                         // <!ELEMENT P - O (%text)* >
1325                         case Tp:
1326                                 pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
1327                                 ps->inpar = 1;
1328                                 ps->skipwhite = 1;
1329                                 break;
1330
1331                         case Tp+RBRA:
1332                                 break;
1333
1334                         // <!ELEMENT PARAM - O EMPTY>
1335                         // Do something when we do applets...
1336                         case Tparam:
1337                                 break;
1338
1339                         // <!ELEMENT PRE - - (%text)* -(IMG|BIG|SMALL|SUB|SUP|FONT) >
1340                         case Tpre:
1341                                 ps->curstate &= ~IFwrap;
1342                                 ps->literal = 1;
1343                                 ps->skipwhite = 0;
1344                                 pushfontstyle(ps, FntT);
1345                                 break;
1346
1347                         case Tpre+RBRA:
1348                                 ps->curstate |= IFwrap;
1349                                 if(ps->literal) {
1350                                         popfontstyle(ps);
1351                                         ps->literal = 0;
1352                                 }
1353                                 break;
1354
1355                         // <!ELEMENT SCRIPT - - CDATA>
1356                         case Tscript:
1357                                 if(doscripts) {
1358                                         if(!di->hasscripts) {
1359                                                 if(di->scripttype == TextJavascript) {
1360                                                         // TODO: initialize script if nec.
1361                                                         // initjscript(di);
1362                                                         di->hasscripts = 1;
1363                                                 }
1364                                         }
1365                                 }
1366                                 if(!di->hasscripts) {
1367                                         if(warn)
1368                                                 fprint(2, "warning: <SCRIPT> ignored\n");
1369                                         ps->skipping = 1;
1370                                 }
1371                                 else {
1372                                         scriptsrc = aurlval(tok, Asrc, nil, di->base);
1373                                         script = nil;
1374                                         if(scriptsrc != nil) {
1375                                                 if(warn)
1376                                                         fprint(2, "warning: non-local <SCRIPT> ignored\n");
1377                                                 free(scriptsrc);
1378                                         }
1379                                         else {
1380                                                 script = getpcdata(toks, tokslen, &toki);
1381                                         }
1382                                         if(script != nil) {
1383                                                 if(warn)
1384                                                         fprint(2, "script ignored\n");
1385                                                 free(script);
1386                                         }
1387                                 }
1388                                 break;
1389
1390                         case Tscript+RBRA:
1391                                 ps->skipping = 0;
1392                                 break;
1393
1394                         // <!ELEMENT SELECT - - (OPTION+)>
1395                         case Tselect:
1396                                 if(is->curform == nil) {
1397                                         if(warn)
1398                                                 fprint(2, "<SELECT> not inside <FORM>\n");
1399                                         continue;
1400                                 }
1401                                 field = newformfield(Fselect,
1402                                         ++is->curform->nfields,
1403                                         is->curform,
1404                                         aval(tok, Aname),
1405                                         nil,
1406                                         auintval(tok, Asize, 0),
1407                                         0,
1408                                         is->curform->fields);
1409                                 is->curform->fields = field;
1410                                 if(aflagval(tok, Amultiple))
1411                                         field->flags = FFmultiple;
1412                                 ffit = newiformfield(field);
1413                                 additem(ps, ffit, tok);
1414                                 if(ffit->genattr != nil)
1415                                         field->events = ffit->genattr->events;
1416                                 // throw away stuff until next tag (should be <OPTION>)
1417                                 s = getpcdata(toks, tokslen, &toki);
1418                                 if(s != nil)
1419                                         free(s);
1420                                 break;
1421
1422                         case Tselect+RBRA:
1423                                 if(is->curform == nil || is->curform->fields == nil) {
1424                                         if(warn)
1425                                                 fprint(2, "warning: unexpected </SELECT>\n");
1426                                         continue;
1427                                 }
1428                                 field = is->curform->fields;
1429                                 if(field->ftype != Fselect)
1430                                         continue;
1431                                 // put options back in input order
1432                                 field->options = (Option*)_revlist((List*)field->options);
1433                                 break;
1434
1435                         // <!ELEMENT (STRIKE|U) - - (%text)*>
1436                         case Tstrike:
1437                         case Tu:
1438                                 ps->curul = push(&ps->ulstk, (tag==Tstrike)? ULmid : ULunder);
1439                                 break;
1440
1441                         case Tstrike+RBRA:
1442                         case Tu+RBRA:
1443                                 if(ps->ulstk.n == 0) {
1444                                         if(warn)
1445                                                 fprint(2, "warning: unexpected %T\n", tok);
1446                                         continue;
1447                                 }
1448                                 ps->curul = popretnewtop(&ps->ulstk, ULnone);
1449                                 break;
1450
1451                         // <!ELEMENT STYLE - - CDATA>
1452                         case Tstyle:
1453                                 if(warn)
1454                                         fprint(2, "warning: unimplemented <STYLE>\n");
1455                                 ps->skipping = 1;
1456                                 break;
1457
1458                         case Tstyle+RBRA:
1459                                 ps->skipping = 0;
1460                                 break;
1461
1462                         // <!ELEMENT (SUB|SUP) - - (%text)*>
1463                         case Tsub:
1464                         case Tsup:
1465                                 if(tag == Tsub)
1466                                         ps->curvoff += SUBOFF;
1467                                 else
1468                                         ps->curvoff -= SUPOFF;
1469                                 push(&ps->voffstk, ps->curvoff);
1470                                 sz = top(&ps->fntsizestk, Normal);
1471                                 pushfontsize(ps, sz - 1);
1472                                 break;
1473
1474                         case Tsub+RBRA:
1475                         case Tsup+RBRA:
1476                                 if(ps->voffstk.n == 0) {
1477                                         if(warn)
1478                                                 fprint(2, "warning: unexpected %T\n", tok);
1479                                         continue;
1480                                 }
1481                                 ps->curvoff = popretnewtop(&ps->voffstk, 0);
1482                                 popfontsize(ps);
1483                                 break;
1484
1485                         // <!ELEMENT TABLE - - (CAPTION?, TR+)>
1486                         case Ttable:
1487                                 ps->skipwhite = 0;
1488                                 tab = newtable(++is->ntables,
1489                                                 aalign(tok),
1490                                                 adimen(tok, Awidth),
1491                                                 aflagval(tok, Aborder), 
1492                                                 auintval(tok, Acellspacing, TABSP),
1493                                                 auintval(tok, Acellpadding, TABPAD),
1494                                                 makebackground(nil, acolorval(tok, Abgcolor, ps->curbg.color)),
1495                                                 tok,
1496                                                 is->tabstk);
1497                                 is->tabstk = tab;
1498                                 curtab = tab;
1499                                 break;
1500
1501                         case Ttable+RBRA:
1502                                 if(curtab == nil) {
1503                                         if(warn)
1504                                                 fprint(2, "warning: unexpected </TABLE>\n");
1505                                         continue;
1506                                 }
1507                                 isempty = (curtab->cells == nil);
1508                                 if(isempty) {
1509                                         if(warn)
1510                                                 fprint(2, "warning: <TABLE> has no cells\n");
1511                                 }
1512                                 else {
1513                                         ps = finishcell(curtab, ps);
1514                                         if(curtab->rows != nil)
1515                                                 curtab->rows->flags = 0;
1516                                         finish_table(curtab);
1517                                 }
1518                                 ps->skipping = 0;
1519                                 if(!isempty) {
1520                                         tabitem = newitable(curtab);
1521                                         al = curtab->align.halign;
1522                                         switch(al) {
1523                                         case ALleft:
1524                                         case ALright:
1525                                                 additem(ps, newifloat(tabitem, al), tok);
1526                                                 break;
1527                                         default:
1528                                                 if(al == ALcenter)
1529                                                         pushjust(ps, ALcenter);
1530                                                 addbrk(ps, 0, 0);
1531                                                 if(ps->inpar) {
1532                                                         popjust(ps);
1533                                                         ps->inpar = 0;
1534                                                 }
1535                                                 additem(ps, tabitem, curtab->tabletok);
1536                                                 if(al == ALcenter)
1537                                                         popjust(ps);
1538                                                 break;
1539                                         }
1540                                 }
1541                                 if(is->tabstk == nil) {
1542                                         if(warn)
1543                                                 fprint(2, "warning: table stack is wrong\n");
1544                                 }
1545                                 else
1546                                         is->tabstk = is->tabstk->next;
1547                                 curtab->next = di->tables;
1548                                 di->tables = curtab;
1549                                 curtab = is->tabstk;
1550                                 if(!isempty)
1551                                         addbrk(ps, 0, 0);
1552                                 break;
1553
1554                         // <!ELEMENT (TH|TD) - O %body.content>
1555                         // Cells for a row are accumulated in reverse order.
1556                         // We push ps on a stack, and use a new one to accumulate
1557                         // the contents of the cell.
1558                         case Ttd:
1559                         case Tth:
1560                                 if(curtab == nil) {
1561                                         if(warn)
1562                                                 fprint(2, "%T outside <TABLE>\n", tok);
1563                                         continue;
1564                                 }
1565                                 if(ps->inpar) {
1566                                         popjust(ps);
1567                                         ps->inpar = 0;
1568                                 }
1569                                 ps = finishcell(curtab, ps);
1570                                 tr = nil;
1571                                 if(curtab->rows != nil)
1572                                         tr = curtab->rows;
1573                                 if(tr == nil || !tr->flags) {
1574                                         if(warn)
1575                                                 fprint(2, "%T outside row\n", tok);
1576                                         tr = newtablerow(makealign(ALnone, ALnone),
1577                                                         makebackground(nil, curtab->background.color),
1578                                                         TFparsing,
1579                                                         curtab->rows);
1580                                         curtab->rows = tr;
1581                                 }
1582                                 ps = cell_pstate(ps, tag == Tth);
1583                                 flags = TFparsing;
1584                                 if(aflagval(tok, Anowrap)) {
1585                                         flags |= TFnowrap;
1586                                         ps->curstate &= ~IFwrap;
1587                                 }
1588                                 if(tag == Tth)
1589                                         flags |= TFisth;
1590                                 c = newtablecell(curtab->cells==nil? 1 : curtab->cells->cellid+1,
1591                                                 auintval(tok, Arowspan, 1),
1592                                                 auintval(tok, Acolspan, 1), 
1593                                                 aalign(tok), 
1594                                                 adimen(tok, Awidth),
1595                                                 auintval(tok, Aheight, 0),
1596                                                 makebackground(nil, acolorval(tok, Abgcolor, tr->background.color)),
1597                                                 flags,
1598                                                 curtab->cells);
1599                                 curtab->cells = c;
1600                                 ps->curbg = c->background;
1601                                 if(c->align.halign == ALnone) {
1602                                         if(tr->align.halign != ALnone)
1603                                                 c->align.halign = tr->align.halign;
1604                                         else if(tag == Tth)
1605                                                 c->align.halign = ALcenter;
1606                                         else
1607                                                 c->align.halign = ALleft;
1608                                 }
1609                                 if(c->align.valign == ALnone) {
1610                                         if(tr->align.valign != ALnone)
1611                                                 c->align.valign = tr->align.valign;
1612                                         else
1613                                                 c->align.valign = ALmiddle;
1614                                 }
1615                                 c->nextinrow = tr->cells;
1616                                 tr->cells = c;
1617                                 break;
1618
1619                         case Ttd+RBRA:
1620                         case Tth+RBRA:
1621                                 if(curtab == nil || curtab->cells == nil) {
1622                                         if(warn)
1623                                                 fprint(2, "unexpected %T\n", tok);
1624                                         continue;
1625                                 }
1626                                 ps = finishcell(curtab, ps);
1627                                 break;
1628
1629                         // <!ELEMENT TEXTAREA - - (     //PCDATA)>
1630                         case Ttextarea:
1631                                 if(is->curform == nil) {
1632                                         if(warn)
1633                                                 fprint(2, "<TEXTAREA> not inside <FORM>\n");
1634                                         continue;
1635                                 }
1636                                 field = newformfield(Ftextarea,
1637                                         ++is->curform->nfields,
1638                                         is->curform,
1639                                         aval(tok, Aname),
1640                                         nil,
1641                                         0,
1642                                         0,
1643                                         is->curform->fields);
1644                                 is->curform->fields = field;
1645                                 field->rows = auintval(tok, Arows, 3);
1646                                 field->cols = auintval(tok, Acols, 50);
1647                                 field->value = getpcdata(toks, tokslen, &toki);
1648                                 if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttextarea + RBRA)
1649                                         fprint(2, "warning: <TEXTAREA> data ended by %T\n", &toks[toki + 1]);
1650                                 ffit = newiformfield(field);
1651                                 additem(ps, ffit, tok);
1652                                 if(ffit->genattr != nil)
1653                                         field->events = ffit->genattr->events;
1654                                 break;
1655
1656                         // <!ELEMENT TITLE - - (        //PCDATA)* -(%head.misc)>
1657                         case Ttitle:
1658                                 di->doctitle = getpcdata(toks, tokslen, &toki);
1659                                 if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttitle + RBRA)
1660                                         fprint(2, "warning: <TITLE> data ended by %T\n", &toks[toki + 1]);
1661                                 break;
1662
1663                         // <!ELEMENT TR - O (TH|TD)+>
1664                         // rows are accumulated in reverse order in curtab->rows
1665                         case Ttr:
1666                                 if(curtab == nil) {
1667                                         if(warn)
1668                                                 fprint(2, "warning: <TR> outside <TABLE>\n");
1669                                         continue;
1670                                 }
1671                                 if(ps->inpar) {
1672                                         popjust(ps);
1673                                         ps->inpar = 0;
1674                                 }
1675                                 ps = finishcell(curtab, ps);
1676                                 if(curtab->rows != nil)
1677                                         curtab->rows->flags = 0;
1678                                 curtab->rows = newtablerow(aalign(tok),
1679                                         makebackground(nil, acolorval(tok, Abgcolor, curtab->background.color)),
1680                                         TFparsing,
1681                                         curtab->rows);
1682                                 break;
1683
1684                         case Ttr+RBRA:
1685                                 if(curtab == nil || curtab->rows == nil) {
1686                                         if(warn)
1687                                                 fprint(2, "warning: unexpected </TR>\n");
1688                                         continue;
1689                                 }
1690                                 ps = finishcell(curtab, ps);
1691                                 tr = curtab->rows;
1692                                 if(tr->cells == nil) {
1693                                         if(warn)
1694                                                 fprint(2, "warning: empty row\n");
1695                                         curtab->rows = tr->next;
1696                                         tr->next = nil;
1697                                 }
1698                                 else
1699                                         tr->flags = 0;
1700                                 break;
1701
1702                         // <!ELEMENT (TT|CODE|KBD|SAMP) - - (%text)*>
1703                         case Ttt:
1704                         case Tcode:
1705                         case Tkbd:
1706                         case Tsamp:
1707                                 pushfontstyle(ps, FntT);
1708                                 break;
1709
1710                         // Tags that have empty action
1711                         case Tabbr:
1712                         case Tabbr+RBRA:
1713                         case Tacronym:
1714                         case Tacronym+RBRA:
1715                         case Tarea+RBRA:
1716                         case Tbase+RBRA:
1717                         case Tbasefont+RBRA:
1718                         case Tbr+RBRA:
1719                         case Tdd+RBRA:
1720                         case Tdt+RBRA:
1721                         case Tframe+RBRA:
1722                         case Thr+RBRA:
1723                         case Thtml:
1724                         case Thtml+RBRA:
1725                         case Timg+RBRA:
1726                         case Tinput+RBRA:
1727                         case Tisindex+RBRA:
1728                         case Tli+RBRA:
1729                         case Tlink:
1730                         case Tlink+RBRA:
1731                         case Tmeta+RBRA:
1732                         case Toption+RBRA:
1733                         case Tparam+RBRA:
1734                         case Ttextarea+RBRA:
1735                         case Ttitle+RBRA:
1736                                 break;
1737
1738
1739                         // Tags not implemented
1740                         case Tbdo:
1741                         case Tbdo+RBRA:
1742                         case Tbutton:
1743                         case Tbutton+RBRA:
1744                         case Tdel:
1745                         case Tdel+RBRA:
1746                         case Tfieldset:
1747                         case Tfieldset+RBRA:
1748                         case Tiframe:
1749                         case Tiframe+RBRA:
1750                         case Tins:
1751                         case Tins+RBRA:
1752                         case Tlabel:
1753                         case Tlabel+RBRA:
1754                         case Tlegend:
1755                         case Tlegend+RBRA:
1756                         case Tobject:
1757                         case Tobject+RBRA:
1758                         case Toptgroup:
1759                         case Toptgroup+RBRA:
1760                         case Tspan:
1761                         case Tspan+RBRA:
1762                                 if(warn) {
1763                                         if(tag > RBRA)
1764                                                 tag -= RBRA;
1765                                         fprint(2, "warning: unimplemented HTML tag: %S\n", tagnames[tag]);
1766                                 }
1767                                 break;
1768
1769                         default:
1770                                 if(warn)
1771                                         fprint(2, "warning: unknown HTML tag: %S\n", tok->text);
1772                                 break;
1773                         }
1774         }
1775         // some pages omit trailing </table>
1776         while(curtab != nil) {
1777                 if(warn)
1778                         fprint(2, "warning: <TABLE> not closed\n");
1779                 if(curtab->cells != nil) {
1780                         ps = finishcell(curtab, ps);
1781                         if(curtab->cells == nil) {
1782                                 if(warn)
1783                                         fprint(2, "warning: empty table\n");
1784                         }
1785                         else {
1786                                 if(curtab->rows != nil)
1787                                         curtab->rows->flags = 0;
1788                                 finish_table(curtab);
1789                                 ps->skipping = 0;
1790                                 additem(ps, newitable(curtab), curtab->tabletok);
1791                                 addbrk(ps, 0, 0);
1792                         }
1793                 }
1794                 if(is->tabstk != nil)
1795                         is->tabstk = is->tabstk->next;
1796                 curtab->next = di->tables;
1797                 di->tables = curtab;
1798                 curtab = is->tabstk;
1799         }
1800         outerps = lastps(ps);
1801         ans = outerps->items->next;
1802         freeitem(outerps->items);
1803         // note: ans may be nil and di->kids not nil, if there's a frameset!
1804         outerps->items = newispacer(ISPnull);
1805         outerps->lastit = outerps->items;
1806         is->psstk = ps;
1807         if(ans != nil && di->hasscripts) {
1808                 // TODO evalscript(nil);
1809                 ;
1810         }
1811
1812 return_ans:
1813         if(dbgbuild) {
1814                 assert(validitems(ans));
1815                 if(ans == nil)
1816                         fprint(2, "getitems returning nil\n");
1817                 else
1818                         printitems(ans, "getitems returning:");
1819         }
1820         return ans;
1821 }
1822
1823 // Concatenate together maximal set of Data tokens, starting at toks[toki+1].
1824 // Lexer has ensured that there will either be a following non-data token or
1825 // we will be at eof.
1826 // Return emallocd trimmed concatenation, and update *ptoki to last used toki
1827 static Rune*
1828 getpcdata(Token* toks, int tokslen, int* ptoki)
1829 {
1830         Rune*   ans;
1831         Rune*   p;
1832         Rune*   trimans;
1833         int     anslen;
1834         int     trimanslen;
1835         int     toki;
1836         Token*  tok;
1837
1838         ans = nil;
1839         anslen = 0;
1840         // first find length of answer
1841         toki = (*ptoki) + 1;
1842         while(toki < tokslen) {
1843                 tok = &toks[toki];
1844                 if(tok->tag == Data) {
1845                         toki++;
1846                         anslen += _Strlen(tok->text);
1847                 }
1848                 else
1849                         break;
1850         }
1851         // now make up the initial answer
1852         if(anslen > 0) {
1853                 ans = _newstr(anslen);
1854                 p = ans;
1855                 toki = (*ptoki) + 1;
1856                 while(toki < tokslen) {
1857                         tok = &toks[toki];
1858                         if(tok->tag == Data) {
1859                                 toki++;
1860                                 p = _Stradd(p, tok->text, _Strlen(tok->text));
1861                         }
1862                         else
1863                                 break;
1864                 }
1865                 *p = 0;
1866                 _trimwhite(ans, anslen, &trimans, &trimanslen);
1867                 if(trimanslen != anslen) {
1868                         p = ans;
1869                         ans = _Strndup(trimans, trimanslen);
1870                         free(p);
1871                 }
1872         }
1873         *ptoki = toki-1;
1874         return ans;
1875 }
1876
1877 // If still parsing head of curtab->cells list, finish it off
1878 // by transferring the items on the head of psstk to the cell.
1879 // Then pop the psstk and return the new psstk.
1880 static Pstate*
1881 finishcell(Table* curtab, Pstate* psstk)
1882 {
1883         Tablecell*      c;
1884         Pstate* psstknext;
1885
1886         c = curtab->cells;
1887         if(c != nil) {
1888                 if((c->flags&TFparsing)) {
1889                         psstknext = psstk->next;
1890                         if(psstknext == nil) {
1891                                 if(warn)
1892                                         fprint(2, "warning: parse state stack is wrong\n");
1893                         }
1894                         else {
1895                                 c->content = psstk->items->next;
1896                                 c->flags &= ~TFparsing;
1897                                 freepstate(psstk);
1898                                 psstk = psstknext;
1899                         }
1900                 }
1901         }
1902         return psstk;
1903 }
1904
1905 // Make a new Pstate for a cell, based on the old pstate, oldps.
1906 // Also, put the new ps on the head of the oldps stack.
1907 static Pstate*
1908 cell_pstate(Pstate* oldps, int ishead)
1909 {
1910         Pstate* ps;
1911         int     sty;
1912
1913         ps = newpstate(oldps);
1914         ps->skipwhite = 1;
1915         ps->curanchor = oldps->curanchor;
1916         copystack(&ps->fntstylestk, &oldps->fntstylestk);
1917         copystack(&ps->fntsizestk, &oldps->fntsizestk);
1918         ps->curfont = oldps->curfont;
1919         ps->curfg = oldps->curfg;
1920         ps->curbg = oldps->curbg;
1921         copystack(&ps->fgstk, &oldps->fgstk);
1922         ps->adjsize = oldps->adjsize;
1923         if(ishead) {
1924                 sty = ps->curfont%NumSize;
1925                 ps->curfont = FntB*NumSize + sty;
1926         }
1927         return ps;
1928 }
1929
1930 // Return a new Pstate with default starting state.
1931 // Use link to add it to head of a list, if any.
1932 static Pstate*
1933 newpstate(Pstate* link)
1934 {
1935         Pstate* ps;
1936
1937         ps = (Pstate*)emalloc(sizeof(Pstate));
1938         ps->curfont = DefFnt;
1939         ps->curfg = Black;
1940         ps->curbg.image = nil;
1941         ps->curbg.color = White;
1942         ps->curul = ULnone;
1943         ps->curjust = ALleft;
1944         ps->curstate = IFwrap;
1945         ps->items = newispacer(ISPnull);
1946         ps->lastit = ps->items;
1947         ps->prelastit = nil;
1948         ps->next = link;
1949         return ps;
1950 }
1951
1952 // Return last Pstate on psl list
1953 static Pstate*
1954 lastps(Pstate* psl)
1955 {
1956         assert(psl != nil);
1957         while(psl->next != nil)
1958                 psl = psl->next;
1959         return psl;
1960 }
1961
1962 // Add it to end of ps item chain, adding in current state from ps.
1963 // Also, if tok is not nil, scan it for generic attributes and assign
1964 // the genattr field of the item accordingly.
1965 static void
1966 additem(Pstate* ps, Item* it, Token* tok)
1967 {
1968         int     aid;
1969         int     any;
1970         Rune*   i;
1971         Rune*   c;
1972         Rune*   s;
1973         Rune*   t;
1974         Attr*   a;
1975         SEvent* e;
1976
1977         if(ps->skipping) {
1978                 if(warn)
1979                         fprint(2, "warning: skipping item: %I\n", it);
1980                 return;
1981         }
1982         it->anchorid = ps->curanchor;
1983         it->state |= ps->curstate;
1984         if(tok != nil) {
1985                 any = 0;
1986                 i = nil;
1987                 c = nil;
1988                 s = nil;
1989                 t = nil;
1990                 e = nil;
1991                 for(a = tok->attr; a != nil; a = a->next) {
1992                         aid = a->attid;
1993                         if(!attrinfo[aid])
1994                                 continue;
1995                         switch(aid) {
1996                         case Aid:
1997                                 i = a->value;
1998                                 break;
1999
2000                         case Aclass:
2001                                 c = a->value;
2002                                 break;
2003
2004                         case Astyle:
2005                                 s = a->value;
2006                                 break;
2007
2008                         case Atitle:
2009                                 t = a->value;
2010                                 break;
2011
2012                         default:
2013                                 assert(aid >= Aonblur && aid <= Aonunload);
2014                                 e = newscriptevent(scriptev[a->attid], a->value, e);
2015                                 break;
2016                         }
2017                         a->value = nil;
2018                         any = 1;
2019                 }
2020                 if(any)
2021                         it->genattr = newgenattr(i, c, s, t, e);
2022         }
2023         ps->curstate &= ~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
2024         ps->prelastit = ps->lastit;
2025         ps->lastit->next = it;
2026         ps->lastit = it;
2027 }
2028
2029 // Make a text item out of s,
2030 // using current font, foreground, vertical offset and underline state.
2031 static Item*
2032 textit(Pstate* ps, Rune* s)
2033 {
2034         assert(s != nil);
2035         return newitext(s, ps->curfont, ps->curfg, ps->curvoff + Voffbias, ps->curul);
2036 }
2037
2038 // Add text item or items for s, paying attention to
2039 // current font, foreground, baseline offset, underline state,
2040 // and literal mode.  Unless we're in literal mode, compress
2041 // whitespace to single blank, and, if curstate has a break,
2042 // trim any leading whitespace.  Whether in literal mode or not,
2043 // turn nonbreaking spaces into spacer items with IFnobrk set.
2044 //
2045 // In literal mode, break up s at newlines and add breaks instead.
2046 // Also replace tabs appropriate number of spaces.
2047 // In nonliteral mode, break up the items every 100 or so characters
2048 // just to make the layout algorithm not go quadratic.
2049 //
2050 // addtext assumes ownership of s.
2051 static void
2052 addtext(Pstate* ps, Rune* s)
2053 {
2054         int     n;
2055         int     i;
2056         int     j;
2057         int     k;
2058         int     col;
2059         int     c;
2060         int     nsp;
2061         Item*   it;
2062         Rune*   ss;
2063         Rune*   p;
2064         Rune    buf[SMALLBUFSIZE];
2065
2066         assert(s != nil);
2067         n = runestrlen(s);
2068         i = 0;
2069         j = 0;
2070         if(ps->literal) {
2071                 col = 0;
2072                 while(i < n) {
2073                         if(s[i] == '\n') {
2074                                 if(i > j) {
2075                                         // trim trailing blanks from line
2076                                         for(k = i; k > j; k--)
2077                                                 if(s[k - 1] != ' ')
2078                                                         break;
2079                                         if(k > j)
2080                                                 additem(ps, textit(ps, _Strndup(s+j, k-j)), nil);
2081                                 }
2082                                 addlinebrk(ps, 0);
2083                                 j = i + 1;
2084                                 col = 0;
2085                         }
2086                         else {
2087                                 if(s[i] == '\t') {
2088                                         col += i - j;
2089                                         nsp = 8 - (col%8);
2090                                         // make ss = s[j:i] + nsp spaces
2091                                         ss = _newstr(i-j+nsp);
2092                                         p = _Stradd(ss, s+j, i-j);
2093                                         p = _Stradd(p, L"        ", nsp);
2094                                         *p = 0;
2095                                         additem(ps, textit(ps, ss), nil);
2096                                         col += nsp;
2097                                         j = i + 1;
2098                                 }
2099                                 else if(s[i] == NBSP) {
2100                                         if(i > j)
2101                                                 additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
2102                                         addnbsp(ps);
2103                                         col += (i - j) + 1;
2104                                         j = i + 1;
2105                                 }
2106                         }
2107                         i++;
2108                 }
2109                 if(i > j) {
2110                         if(j == 0 && i == n) {
2111                                 // just transfer s over
2112                                 additem(ps, textit(ps, s), nil);
2113                         }
2114                         else {
2115                                 additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
2116                                 free(s);
2117                         }
2118                 }
2119         }
2120         else {  // not literal mode
2121                 if((ps->curstate&IFbrk) || ps->lastit == ps->items)
2122                         while(i < n) {
2123                                 c = s[i];
2124                                 if(c >= 256 || !isspace(c))
2125                                         break;
2126                                 i++;
2127                         }
2128                 p = buf;
2129                 for(j = i; i < n; i++) {
2130                         assert(p+i-j < buf+SMALLBUFSIZE-1);
2131                         c = s[i];
2132                         if(c == NBSP) {
2133                                 if(i > j)
2134                                         p = _Stradd(p, s+j, i-j);
2135                                 if(p > buf)
2136                                         additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2137                                 p = buf;
2138                                 addnbsp(ps);
2139                                 j = i + 1;
2140                                 continue;
2141                         }
2142                         if(c < 256 && isspace(c)) {
2143                                 if(i > j)
2144                                         p = _Stradd(p, s+j, i-j);
2145                                 *p++ = ' ';
2146                                 while(i < n - 1) {
2147                                         c = s[i + 1];
2148                                         if(c >= 256 || !isspace(c))
2149                                                 break;
2150                                         i++;
2151                                 }
2152                                 j = i + 1;
2153                         }
2154                         if(i - j >= 100) {
2155                                 p = _Stradd(p, s+j, i+1-j);
2156                                 j = i + 1;
2157                         }
2158                         if(p-buf >= 100) {
2159                                 additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2160                                 p = buf;
2161                         }
2162                 }
2163                 if(i > j && j < n) {
2164                         assert(p+i-j < buf+SMALLBUFSIZE-1);
2165                         p = _Stradd(p, s+j, i-j);
2166                 }
2167                 // don't add a space if previous item ended in a space
2168                 if(p-buf == 1 && buf[0] == ' ' && ps->lastit != nil) {
2169                         it = ps->lastit;
2170                         if(it->tag == Itexttag) {
2171                                 ss = ((Itext*)it)->s;
2172                                 k = _Strlen(ss);
2173                                 if(k > 0 && ss[k] == ' ')
2174                                         p = buf;
2175                         }
2176                 }
2177                 if(p > buf)
2178                         additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
2179                 free(s);
2180         }
2181 }
2182
2183 // Add a break to ps->curstate, with extra space if sp is true.
2184 // If there was a previous break, combine this one's parameters
2185 // with that to make the amt be the max of the two and the clr
2186 // be the most general. (amt will be 0 or 1)
2187 // Also, if the immediately preceding item was a text item,
2188 // trim any whitespace from the end of it, if not in literal mode.
2189 // Finally, if this is at the very beginning of the item list
2190 // (the only thing there is a null spacer), then don't add the space.
2191 static void
2192 addbrk(Pstate* ps, int sp, int clr)
2193 {
2194         int     state;
2195         Rune*   l;
2196         int             nl;
2197         Rune*   r;
2198         int             nr;
2199         Itext*  t;
2200         Rune*   s;
2201
2202         state = ps->curstate;
2203         clr = clr|(state&(IFcleft|IFcright));
2204         if(sp && !(ps->lastit == ps->items))
2205                 sp = IFbrksp;
2206         else
2207                 sp = 0;
2208         ps->curstate = IFbrk|sp|(state&~(IFcleft|IFcright))|clr;
2209         if(ps->lastit != ps->items) {
2210                 if(!ps->literal && ps->lastit->tag == Itexttag) {
2211                         t = (Itext*)ps->lastit;
2212                         _splitr(t->s, _Strlen(t->s), notwhitespace, &l, &nl, &r, &nr);
2213                         // try to avoid making empty items
2214                         // but not crucial f the occasional one gets through
2215                         if(nl == 0 && ps->prelastit != nil) {
2216                                 ps->lastit = ps->prelastit;
2217                                 ps->lastit->next = nil;
2218                                 ps->prelastit = nil;
2219                         }
2220                         else {
2221                                 s = t->s;
2222                                 if(nl == 0) {
2223                                         // need a non-nil pointer to empty string
2224                                         // (_Strdup(L"") returns nil)
2225                                         t->s = emalloc(sizeof(Rune));
2226                                         t->s[0] = 0;
2227                                 }
2228                                 else
2229                                         t->s = _Strndup(l, nl);
2230                                 if(s)
2231                                         free(s);
2232                         }
2233                 }
2234         }
2235 }
2236
2237 // Add break due to a <br> or a newline within a preformatted section.
2238 // We add a null item first, with current font's height and ascent, to make
2239 // sure that the current line takes up at least that amount of vertical space.
2240 // This ensures that <br>s on empty lines cause blank lines, and that
2241 // multiple <br>s in a row give multiple blank lines.
2242 // However don't add the spacer if the previous item was something that
2243 // takes up space itself.
2244 static void
2245 addlinebrk(Pstate* ps, int clr)
2246 {
2247         int     obrkstate;
2248         int     b;
2249
2250         // don't want break before our null item unless the previous item
2251         // was also a null item for the purposes of line breaking
2252         obrkstate = ps->curstate&(IFbrk|IFbrksp);
2253         b = IFnobrk;
2254         if(ps->lastit != nil) {
2255                 if(ps->lastit->tag == Ispacertag) {
2256                         if(((Ispacer*)ps->lastit)->spkind == ISPvline)
2257                                 b = IFbrk;
2258                 }
2259         }
2260         ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|b;
2261         additem(ps, newispacer(ISPvline), nil);
2262         ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|obrkstate;
2263         addbrk(ps, 0, clr);
2264 }
2265
2266 // Add a nonbreakable space
2267 static void
2268 addnbsp(Pstate* ps)
2269 {
2270         // if nbsp comes right where a break was specified,
2271         // do the break anyway (nbsp is being used to generate undiscardable
2272         // space rather than to prevent a break)
2273         if((ps->curstate&IFbrk) == 0)
2274                 ps->curstate |= IFnobrk;
2275         additem(ps, newispacer(ISPhspace), nil);
2276         // but definitely no break on next item
2277         ps->curstate |= IFnobrk;
2278 }
2279
2280 // Change hang in ps.curstate by delta.
2281 // The amount is in 1/10ths of tabs, and is the amount that
2282 // the current contiguous set of items with a hang value set
2283 // is to be shifted left from its normal (indented) place.
2284 static void
2285 changehang(Pstate* ps, int delta)
2286 {
2287         int     amt;
2288
2289         amt = (ps->curstate&IFhangmask) + delta;
2290         if(amt < 0) {
2291                 if(warn)
2292                         fprint(2, "warning: hang went negative\n");
2293                 amt = 0;
2294         }
2295         ps->curstate = (ps->curstate&~IFhangmask)|amt;
2296 }
2297
2298 // Change indent in ps.curstate by delta.
2299 static void
2300 changeindent(Pstate* ps, int delta)
2301 {
2302         int     amt;
2303
2304         amt = ((ps->curstate&IFindentmask) >> IFindentshift) + delta;
2305         if(amt < 0) {
2306                 if(warn)
2307                         fprint(2, "warning: indent went negative\n");
2308                 amt = 0;
2309         }
2310         ps->curstate = (ps->curstate&~IFindentmask)|(amt << IFindentshift);
2311 }
2312
2313 // Push val on top of stack, and also return value pushed
2314 static int
2315 push(Stack* stk, int val)
2316 {
2317         if(stk->n == Nestmax) {
2318                 if(warn)
2319                         fprint(2, "warning: build stack overflow\n");
2320         }
2321         else
2322                 stk->slots[stk->n++] = val;
2323         return val;
2324 }
2325
2326 // Pop top of stack
2327 static void
2328 pop(Stack* stk)
2329 {
2330         if(stk->n > 0)
2331                 --stk->n;
2332 }
2333
2334 //Return top of stack, using dflt if stack is empty
2335 static int
2336 top(Stack* stk, int dflt)
2337 {
2338         if(stk->n == 0)
2339                 return dflt;
2340         return stk->slots[stk->n-1];
2341 }
2342
2343 // pop, then return new top, with dflt if empty
2344 static int
2345 popretnewtop(Stack* stk, int dflt)
2346 {
2347         if(stk->n == 0)
2348                 return dflt;
2349         stk->n--;
2350         if(stk->n == 0)
2351                 return dflt;
2352         return stk->slots[stk->n-1];
2353 }
2354
2355 // Copy fromstk entries into tostk
2356 static void
2357 copystack(Stack* tostk, Stack* fromstk)
2358 {
2359         int n;
2360
2361         n = fromstk->n;
2362         tostk->n = n;
2363         memmove(tostk->slots, fromstk->slots, n*sizeof(int));
2364 }
2365
2366 static void
2367 popfontstyle(Pstate* ps)
2368 {
2369         pop(&ps->fntstylestk);
2370         setcurfont(ps);
2371 }
2372
2373 static void
2374 pushfontstyle(Pstate* ps, int sty)
2375 {
2376         push(&ps->fntstylestk, sty);
2377         setcurfont(ps);
2378 }
2379
2380 static void
2381 popfontsize(Pstate* ps)
2382 {
2383         pop(&ps->fntsizestk);
2384         setcurfont(ps);
2385 }
2386
2387 static void
2388 pushfontsize(Pstate* ps, int sz)
2389 {
2390         push(&ps->fntsizestk, sz);
2391         setcurfont(ps);
2392 }
2393
2394 static void
2395 setcurfont(Pstate* ps)
2396 {
2397         int     sty;
2398         int     sz;
2399
2400         sty = top(&ps->fntstylestk, FntR);
2401         sz = top(&ps->fntsizestk, Normal);
2402         if(sz < Tiny)
2403                 sz = Tiny;
2404         if(sz > Verylarge)
2405                 sz = Verylarge;
2406         ps->curfont = sty*NumSize + sz;
2407 }
2408
2409 static void
2410 popjust(Pstate* ps)
2411 {
2412         pop(&ps->juststk);
2413         setcurjust(ps);
2414 }
2415
2416 static void
2417 pushjust(Pstate* ps, int j)
2418 {
2419         push(&ps->juststk, j);
2420         setcurjust(ps);
2421 }
2422
2423 static void
2424 setcurjust(Pstate* ps)
2425 {
2426         int     j;
2427         int     state;
2428
2429         j = top(&ps->juststk, ALleft);
2430         if(j != ps->curjust) {
2431                 ps->curjust = j;
2432                 state = ps->curstate;
2433                 state &= ~(IFrjust|IFcjust);
2434                 if(j == ALcenter)
2435                         state |= IFcjust;
2436                 else if(j == ALright)
2437                         state |= IFrjust;
2438                 ps->curstate = state;
2439         }
2440 }
2441
2442 // Do final rearrangement after table parsing is finished
2443 // and assign cells to grid points
2444 static void
2445 finish_table(Table* t)
2446 {
2447         int     ncol;
2448         int     nrow;
2449         int     r;
2450         Tablerow*       rl;
2451         Tablecell*      cl;
2452         int*    rowspancnt;
2453         Tablecell**     rowspancell;
2454         int     ri;
2455         int     ci;
2456         Tablecell*      c;
2457         Tablecell*      cnext;
2458         Tablerow*       row;
2459         Tablerow*       rownext;
2460         int     rcols;
2461         int     newncol;
2462         int     k;
2463         int     j;
2464         int     cspan;
2465         int     rspan;
2466         int     i;
2467
2468         rl = t->rows;
2469         t->nrow = nrow = _listlen((List*)rl);
2470         t->rows = (Tablerow*)emalloc(nrow * sizeof(Tablerow));
2471         ncol = 0;
2472         r = nrow - 1;
2473         for(row = rl; row != nil; row = rownext) {
2474                 // copy the data from the allocated Tablerow into the array slot
2475                 t->rows[r] = *row;
2476                 rownext = row->next;
2477                 row = &t->rows[r];
2478                 r--;
2479                 rcols = 0;
2480                 c = row->cells;
2481
2482                 // If rowspan is > 1 but this is the last row,
2483                 // reset the rowspan
2484                 if(c != nil && c->rowspan > 1 && r == nrow-2)
2485                                 c->rowspan = 1;
2486
2487                 // reverse row->cells list (along nextinrow pointers)
2488                 row->cells = nil;
2489                 while(c != nil) {
2490                         cnext = c->nextinrow;
2491                         c->nextinrow = row->cells;
2492                         row->cells = c;
2493                         rcols += c->colspan;
2494                         c = cnext;
2495                 }
2496                 if(rcols > ncol)
2497                         ncol = rcols;
2498         }
2499         t->ncol = ncol;
2500         t->cols = (Tablecol*)emalloc(ncol * sizeof(Tablecol));
2501
2502         // Reverse cells just so they are drawn in source order.
2503         // Also, trim their contents so they don't end in whitespace.
2504         t->cells = (Tablecell*)_revlist((List*)t->cells);
2505         for(c = t->cells; c != nil; c= c->next)
2506                 trim_cell(c);
2507         t->grid = (Tablecell***)emalloc(nrow * sizeof(Tablecell**));
2508         for(i = 0; i < nrow; i++)
2509                 t->grid[i] = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
2510
2511         // The following arrays keep track of cells that are spanning
2512         // multiple rows;  rowspancnt[i] is the number of rows left
2513         // to be spanned in column i.
2514         // When done, cell's (row,col) is upper left grid point.
2515         rowspancnt = (int*)emalloc(ncol * sizeof(int));
2516         rowspancell = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
2517         for(ri = 0; ri < nrow; ri++) {
2518                 row = &t->rows[ri];
2519                 cl = row->cells;
2520                 ci = 0;
2521                 while(ci < ncol || cl != nil) {
2522                         if(ci < ncol && rowspancnt[ci] > 0) {
2523                                 t->grid[ri][ci] = rowspancell[ci];
2524                                 rowspancnt[ci]--;
2525                                 ci++;
2526                         }
2527                         else {
2528                                 if(cl == nil) {
2529                                         ci++;
2530                                         continue;
2531                                 }
2532                                 c = cl;
2533                                 cl = cl->nextinrow;
2534                                 cspan = c->colspan;
2535                                 rspan = c->rowspan;
2536                                 if(ci + cspan > ncol) {
2537                                         // because of row spanning, we calculated
2538                                         // ncol incorrectly; adjust it
2539                                         newncol = ci + cspan;
2540                                         t->cols = (Tablecol*)erealloc(t->cols, newncol * sizeof(Tablecol));
2541                                         rowspancnt = (int*)erealloc(rowspancnt, newncol * sizeof(int));
2542                                         rowspancell = (Tablecell**)erealloc(rowspancell, newncol * sizeof(Tablecell*));
2543                                         k = newncol-ncol;
2544                                         memset(t->cols+ncol, 0, k*sizeof(Tablecol));
2545                                         memset(rowspancnt+ncol, 0, k*sizeof(int));
2546                                         memset(rowspancell+ncol, 0, k*sizeof(Tablecell*));
2547                                         for(j = 0; j < nrow; j++) {
2548                                                 t->grid[j] = (Tablecell**)erealloc(t->grid[j], newncol * sizeof(Tablecell*));
2549                                                 memset(t->grid[j], 0, k*sizeof(Tablecell*));
2550                                         }
2551                                         t->ncol = ncol = newncol;
2552                                 }
2553                                 c->row = ri;
2554                                 c->col = ci;
2555                                 for(i = 0; i < cspan; i++) {
2556                                         t->grid[ri][ci] = c;
2557                                         if(rspan > 1) {
2558                                                 rowspancnt[ci] = rspan - 1;
2559                                                 rowspancell[ci] = c;
2560                                         }
2561                                         ci++;
2562                                 }
2563                         }
2564                 }
2565         }
2566         free(rowspancnt);
2567         free(rowspancell);
2568 }
2569
2570 // Remove tail of cell content until it isn't whitespace.
2571 static void
2572 trim_cell(Tablecell* c)
2573 {
2574         int     dropping;
2575         Rune*   s;
2576         Rune*   x;
2577         Rune*   y;
2578         int             nx;
2579         int             ny;
2580         Item*   p;
2581         Itext*  q;
2582         Item*   pprev;
2583
2584         dropping = 1;
2585         while(c->content != nil && dropping) {
2586                 p = c->content;
2587                 pprev = nil;
2588                 while(p->next != nil) {
2589                         pprev = p;
2590                         p = p->next;
2591                 }
2592                 dropping = 0;
2593                 if(!(p->state&IFnobrk)) {
2594                         if(p->tag == Itexttag) {
2595                                 q = (Itext*)p;
2596                                 s = q->s;
2597                                 _splitr(s, _Strlen(s), notwhitespace, &x, &nx, &y, &ny);
2598                                 if(nx != 0 && ny != 0) {
2599                                         q->s = _Strndup(x, nx);
2600                                         free(s);
2601                                 }
2602                                 break;
2603                         }
2604                 }
2605                 if(dropping) {
2606                         if(pprev == nil)
2607                                 c->content = nil;
2608                         else
2609                                 pprev->next = nil;
2610                         freeitem(p);
2611                 }
2612         }
2613 }
2614
2615 // Caller must free answer (eventually).
2616 static Rune*
2617 listmark(uchar ty, int n)
2618 {
2619         Rune*   s;
2620         Rune*   t;
2621         int     n2;
2622         int     i;
2623
2624         s = nil;
2625         switch(ty) {
2626         case LTdisc:
2627         case LTsquare:
2628         case LTcircle:
2629                 s = _newstr(1);
2630                 s[0] = (ty == LTdisc)? 0x2022           // bullet
2631                         : ((ty == LTsquare)? 0x220e     // filled square
2632                             : 0x2218);                          // degree
2633                 s[1] = 0;
2634                 break;
2635
2636         case LT1:
2637                 s = runesmprint("%d.", n);
2638                 break;
2639
2640         case LTa:
2641         case LTA:
2642                 n--;
2643                 i = 0;
2644                 if(n < 0)
2645                         n = 0;
2646                 s = _newstr((n <= 25)? 2 : 3);
2647                 if(n > 25) {
2648                         n2 = n%26;
2649                         n /= 26;
2650                         if(n2 > 25)
2651                                 n2 = 25;
2652                         s[i++] = n2 + (ty == LTa)? 'a' : 'A';
2653                 }
2654                 s[i++] = n + (ty == LTa)? 'a' : 'A';
2655                 s[i++] = '.';
2656                 s[i] = 0;
2657                 break;
2658
2659         case LTi:
2660         case LTI:
2661                 if(n >= NROMAN) {
2662                         if(warn)
2663                                 fprint(2, "warning: unimplemented roman number > %d\n", NROMAN);
2664                         n = NROMAN;
2665                 }
2666                 t = roman[n - 1];
2667                 n2 = _Strlen(t);
2668                 s = _newstr(n2+1);
2669                 for(i = 0; i < n2; i++)
2670                         s[i] = (ty == LTi)? tolower(t[i]) : t[i];
2671                 s[i++] = '.';
2672                 s[i] = 0;
2673                 break;
2674         }
2675         return s;
2676 }
2677
2678 // Find map with given name in di.maps.
2679 // If not there, add one, copying name.
2680 // Ownership of map remains with di->maps list.
2681 static Map*
2682 getmap(Docinfo* di, Rune* name)
2683 {
2684         Map*    m;
2685
2686         for(m = di->maps; m != nil; m = m->next) {
2687                 if(!_Strcmp(name, m->name))
2688                         return m;
2689         }
2690         m = (Map*)emalloc(sizeof(Map));
2691         m->name = _Strdup(name);
2692         m->areas = nil;
2693         m->next = di->maps;
2694         di->maps = m;
2695         return m;
2696 }
2697
2698 // Transfers ownership of href to Area
2699 static Area*
2700 newarea(int shape, Rune* href, int target, Area* link)
2701 {
2702         Area* a;
2703
2704         a = (Area*)emalloc(sizeof(Area));
2705         a->shape = shape;
2706         a->href = href;
2707         a->target = target;
2708         a->next = link;
2709         return a;
2710 }
2711
2712 // Return string value associated with attid in tok, nil if none.
2713 // Caller must free the result (eventually).
2714 static Rune*
2715 aval(Token* tok, int attid)
2716 {
2717         Rune*   ans;
2718
2719         _tokaval(tok, attid, &ans, 1);  // transfers string ownership from token to ans
2720         return ans;
2721 }
2722
2723 // Like aval, but use dflt if there was no such attribute in tok.
2724 // Caller must free the result (eventually).
2725 static Rune*
2726 astrval(Token* tok, int attid, Rune* dflt)
2727 {
2728         Rune*   ans;
2729
2730         if(_tokaval(tok, attid, &ans, 1))
2731                 return ans;     // transfers string ownership from token to ans
2732         else
2733                 return _Strdup(dflt);
2734 }
2735
2736 // Here we're supposed to convert to an int,
2737 // and have a default when not found
2738 static int
2739 aintval(Token* tok, int attid, int dflt)
2740 {
2741         Rune*   ans;
2742
2743         if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
2744                 return dflt;
2745         else
2746                 return toint(ans);
2747 }
2748
2749 // Like aintval, but result should be >= 0
2750 static int
2751 auintval(Token* tok, int attid, int dflt)
2752 {
2753         Rune* ans;
2754         int v;
2755
2756         if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
2757                 return dflt;
2758         else {
2759                 v = toint(ans);
2760                 return v >= 0? v : 0;
2761         }
2762 }
2763
2764 // int conversion, but with possible error check (if warning)
2765 static int
2766 toint(Rune* s)
2767 {
2768         int ans;
2769         Rune* eptr;
2770
2771         ans = _Strtol(s, &eptr, 10);
2772         if(warn) {
2773                 if(*eptr != 0) {
2774                         eptr = _Strclass(eptr, notwhitespace);
2775                         if(eptr != nil)
2776                                 fprint(2, "warning: expected integer, got %S\n", s);
2777                 }
2778         }
2779         return ans;
2780 }
2781
2782 // Attribute value when need a table to convert strings to ints
2783 static int
2784 atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt)
2785 {
2786         Rune*   aval;
2787         int     ans;
2788
2789         ans = dflt;
2790         if(_tokaval(tok, attid, &aval, 0)) {
2791                 if(!_lookup(tab, ntab, aval, _Strlen(aval), &ans)) {
2792                         ans = dflt;
2793                         if(warn)
2794                                 fprint(2, "warning: name not found in table lookup: %S\n", aval);
2795                 }
2796         }
2797         return ans;
2798 }
2799
2800 // Attribute value when supposed to be a color
2801 static int
2802 acolorval(Token* tok, int attid, int dflt)
2803 {
2804         Rune*   aval;
2805         int     ans;
2806
2807         ans = dflt;
2808         if(_tokaval(tok, attid, &aval, 0))
2809                 ans = color(aval, dflt);
2810         return ans;
2811 }
2812
2813 // Attribute value when supposed to be a target frame name
2814 static int
2815 atargval(Token* tok, int dflt)
2816 {
2817         int     ans;
2818         Rune*   aval;
2819
2820         ans = dflt;
2821         if(_tokaval(tok, Atarget, &aval, 0)){
2822                 ans = targetid(aval);
2823         }
2824         return ans;
2825 }
2826
2827 // special for list types, where "i" and "I" are different,
2828 // but "square" and "SQUARE" are the same
2829 static int
2830 listtyval(Token* tok, int dflt)
2831 {
2832         Rune*   aval;
2833         int     ans;
2834         int     n;
2835
2836         ans = dflt;
2837         if(_tokaval(tok, Atype, &aval, 0)) {
2838                 n = _Strlen(aval);
2839                 if(n == 1) {
2840                         switch(aval[0]) {
2841                         case '1':
2842                                 ans = LT1;
2843                                 break;
2844                         case 'A':
2845                                 ans = LTA;
2846                                 break;
2847                         case 'I':
2848                                 ans = LTI;
2849                                 break;
2850                         case 'a':
2851                                 ans = LTa;
2852                                 break;
2853                         case 'i':
2854                                 ans = LTi;
2855                         default:
2856                                 if(warn)
2857                                         fprint(2, "warning: unknown list element type %c\n", aval[0]);
2858                         }
2859                 }
2860                 else {
2861                         if(!_Strncmpci(aval, n, L"circle"))
2862                                 ans = LTcircle;
2863                         else if(!_Strncmpci(aval, n, L"disc"))
2864                                 ans = LTdisc;
2865                         else if(!_Strncmpci(aval, n, L"square"))
2866                                 ans = LTsquare;
2867                         else {
2868                                 if(warn)
2869                                         fprint(2, "warning: unknown list element type %S\n", aval);
2870                         }
2871                 }
2872         }
2873         return ans;
2874 }
2875
2876 // Attribute value when value is a URL, possibly relative to base.
2877 // FOR NOW: leave the url relative.
2878 // Caller must free the result (eventually).
2879 static Rune*
2880 aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
2881 {
2882         Rune*   ans;
2883         Rune*   url;
2884
2885         USED(base);
2886         ans = nil;
2887         if(_tokaval(tok, attid, &url, 0) && url != nil)
2888                 ans = removeallwhite(url);
2889         if(ans == nil)
2890                 ans = _Strdup(dflt);
2891         return ans;
2892 }
2893
2894 // Return copy of s but with all whitespace (even internal) removed.
2895 // This fixes some buggy URL specification strings.
2896 static Rune*
2897 removeallwhite(Rune* s)
2898 {
2899         int     j;
2900         int     n;
2901         int     i;
2902         int     c;
2903         Rune*   ans;
2904
2905         j = 0;
2906         n = _Strlen(s);
2907         for(i = 0; i < n; i++) {
2908                 c = s[i];
2909                 if(c >= 256 || !isspace(c))
2910                         j++;
2911         }
2912         if(j < n) {
2913                 ans = _newstr(j);
2914                 j = 0;
2915                 for(i = 0; i < n; i++) {
2916                         c = s[i];
2917                         if(c >= 256 || !isspace(c))
2918                                 ans[j++] = c;
2919                 }
2920                 ans[j] = 0;
2921         }
2922         else
2923                 ans = _Strdup(s);
2924         return ans;
2925 }
2926
2927 // Attribute value when mere presence of attr implies value of 1,
2928 // but if there is an integer there, return it as the value.
2929 static int
2930 aflagval(Token* tok, int attid)
2931 {
2932         int     val;
2933         Rune*   sval;
2934
2935         val = 0;
2936         if(_tokaval(tok, attid, &sval, 0)) {
2937                 val = 1;
2938                 if(sval != nil)
2939                         val = toint(sval);
2940         }
2941         return val;
2942 }
2943
2944 static Align
2945 makealign(int halign, int valign)
2946 {
2947         Align   al;
2948
2949         al.halign = halign;
2950         al.valign = valign;
2951         return al;
2952 }
2953
2954 // Make an Align (two alignments, horizontal and vertical)
2955 static Align
2956 aalign(Token* tok)
2957 {
2958         return makealign(
2959                 atabval(tok, Aalign, align_tab, NALIGNTAB, ALnone),
2960                 atabval(tok, Avalign, align_tab, NALIGNTAB, ALnone));
2961 }
2962
2963 // Make a Dimen, based on value of attid attr
2964 static Dimen
2965 adimen(Token* tok, int attid)
2966 {
2967         Rune*   wd;
2968
2969         if(_tokaval(tok, attid, &wd, 0))
2970                 return parsedim(wd, _Strlen(wd));
2971         else
2972                 return makedimen(Dnone, 0);
2973 }
2974
2975 // Parse s[0:n] as num[.[num]][unit][%|*]
2976 static Dimen
2977 parsedim(Rune* s, int ns)
2978 {
2979         int     kind;
2980         int     spec;
2981         Rune*   l;
2982         int     nl;
2983         Rune*   r;
2984         int     nr;
2985         int     mul;
2986         int     i;
2987         Rune*   f;
2988         int     nf;
2989         int     Tkdpi;
2990         Rune*   units;
2991
2992         kind = Dnone;
2993         spec = 0;
2994         _splitl(s, ns, L"^0-9", &l, &nl, &r, &nr);
2995         if(nl != 0) {
2996                 spec = 1000*_Strtol(l, nil, 10);
2997                 if(nr > 0 && r[0] == '.') {
2998                         _splitl(r+1, nr-1, L"^0-9", &f, &nf, &r, &nr);
2999                         if(nf != 0) {
3000                                 mul = 100;
3001                                 for(i = 0; i < nf; i++) {
3002                                         spec = spec + mul*(f[i]-'0');
3003                                         mul = mul/10;
3004                                 }
3005                         }
3006                 }
3007                 kind = Dpixels;
3008                 if(nr != 0) {
3009                         if(nr >= 2) {
3010                                 Tkdpi = 100;
3011                                 units = r;
3012                                 r = r+2;
3013                                 nr -= 2;
3014                                 if(!_Strncmpci(units, 2, L"pt"))
3015                                         spec = (spec*Tkdpi)/72;
3016                                 else if(!_Strncmpci(units, 2, L"pi"))
3017                                         spec = (spec*12*Tkdpi)/72;
3018                                 else if(!_Strncmpci(units, 2, L"in"))
3019                                         spec = spec*Tkdpi;
3020                                 else if(!_Strncmpci(units, 2, L"cm"))
3021                                         spec = (spec*100*Tkdpi)/254;
3022                                 else if(!_Strncmpci(units, 2, L"mm"))
3023                                         spec = (spec*10*Tkdpi)/254;
3024                                 else if(!_Strncmpci(units, 2, L"em"))
3025                                         spec = spec*15;
3026                                 else {
3027                                         if(warn)
3028                                                 fprint(2, "warning: unknown units %C%Cs\n", units[0], units[1]);
3029                                 }
3030                         }
3031                         if(nr >= 1) {
3032                                 if(r[0] == '%')
3033                                         kind = Dpercent;
3034                                 else if(r[0] == '*')
3035                                         kind = Drelative;
3036                         }
3037                 }
3038                 spec = spec/1000;
3039         }
3040         else if(nr == 1 && r[0] == '*') {
3041                 spec = 1;
3042                 kind = Drelative;
3043         }
3044         return makedimen(kind, spec);
3045 }
3046
3047 static void
3048 setdimarray(Token* tok, int attid, Dimen** pans, int* panslen)
3049 {
3050         Rune*   s;
3051         Dimen*  d;
3052         int     k;
3053         int     nc;
3054         Rune* a[SMALLBUFSIZE];
3055         int     an[SMALLBUFSIZE];
3056
3057         if(_tokaval(tok, attid, &s, 0)) {
3058                 nc = _splitall(s, _Strlen(s), L", ", a, an, SMALLBUFSIZE);
3059                 if(nc > 0) {
3060                         d = (Dimen*)emalloc(nc * sizeof(Dimen));
3061                         for(k = 0; k < nc; k++) {
3062                                 d[k] = parsedim(a[k], an[k]);
3063                         }
3064                         *pans = d;
3065                         *panslen = nc;
3066                         return;
3067                 }
3068         }
3069         *pans = nil;
3070         *panslen = 0;
3071 }
3072
3073 static Background
3074 makebackground(Rune* imageurl, int color)
3075 {
3076         Background bg;
3077
3078         bg.image = imageurl;
3079         bg.color = color;
3080         return bg;
3081 }
3082
3083 static Item*
3084 newitext(Rune* s, int fnt, int fg, int voff, int ul)
3085 {
3086         Itext* t;
3087
3088         assert(s != nil);
3089         t = (Itext*)emalloc(sizeof(Itext));
3090         t->tag = Itexttag;
3091         t->s = s;
3092         t->fnt = fnt;
3093         t->fg = fg;
3094         t->voff = voff;
3095         t->ul = ul;
3096         return (Item*)t;
3097 }
3098
3099 static Item*
3100 newirule(int align, int size, int noshade, int color, Dimen wspec)
3101 {
3102         Irule* r;
3103
3104         r = (Irule*)emalloc(sizeof(Irule));
3105         r->tag = Iruletag;
3106         r->align = align;
3107         r->size = size;
3108         r->noshade = noshade;
3109         r->color = color;
3110         r->wspec = wspec;
3111         return (Item*)r;
3112 }
3113
3114 // Map is owned elsewhere.
3115 static Item*
3116 newiimage(Rune* src, Rune* altrep, int align, int width, int height,
3117                 int hspace, int vspace, int border, int ismap, Map* map)
3118 {
3119         Iimage* i;
3120         int     state;
3121
3122         state = 0;
3123         if(ismap)
3124                 state = IFsmap;
3125         i = (Iimage*)emalloc(sizeof(Iimage));
3126         i->tag = Iimagetag;
3127         i->state = state;
3128         i->imsrc = src;
3129         i->altrep = altrep;
3130         i->align = align;
3131         i->imwidth = width;
3132         i->imheight = height;
3133         i->hspace = hspace;
3134         i->vspace = vspace;
3135         i->border = border;
3136         i->map = map;
3137         i->ctlid = -1;
3138         return (Item*)i;
3139 }
3140
3141 static Item*
3142 newiformfield(Formfield* ff)
3143 {
3144         Iformfield* f;
3145
3146         f = (Iformfield*)emalloc(sizeof(Iformfield));
3147         f->tag = Iformfieldtag;
3148         f->formfield = ff;
3149         return (Item*)f;
3150 }
3151
3152 static Item*
3153 newitable(Table* tab)
3154 {
3155         Itable* t;
3156
3157         t = (Itable*)emalloc(sizeof(Itable));
3158         t->tag = Itabletag;
3159         t->table = tab;
3160         return (Item*)t;
3161 }
3162
3163 static Item*
3164 newifloat(Item* it, int side)
3165 {
3166         Ifloat* f;
3167
3168         f = (Ifloat*)emalloc(sizeof(Ifloat));
3169         f->tag = Ifloattag;
3170         f->state = IFwrap;
3171         f->item = it;
3172         f->side = side;
3173         return (Item*)f;
3174 }
3175
3176 static Item*
3177 newispacer(int spkind)
3178 {
3179         Ispacer* s;
3180
3181         s = (Ispacer*)emalloc(sizeof(Ispacer));
3182         s->tag = Ispacertag;
3183         s->spkind = spkind;
3184         return (Item*)s;
3185 }
3186
3187 // Free one item (caller must deal with next pointer)
3188 static void
3189 freeitem(Item* it)
3190 {
3191         Iimage* ii;
3192         Genattr* ga;
3193
3194         if(it == nil)
3195                 return;
3196
3197         switch(it->tag) {
3198         case Itexttag:
3199                 free(((Itext*)it)->s);
3200                 break;
3201         case Iimagetag:
3202                 ii = (Iimage*)it;
3203                 free(ii->imsrc);
3204                 free(ii->altrep);
3205                 break;
3206         case Iformfieldtag:
3207                 freeformfield(((Iformfield*)it)->formfield);
3208                 break;
3209         case Itabletag:
3210                 freetable(((Itable*)it)->table);
3211                 break;
3212         case Ifloattag:
3213                 freeitem(((Ifloat*)it)->item);
3214                 break;
3215         }
3216         ga = it->genattr;
3217         if(ga != nil) {
3218                 free(ga->id);
3219                 free(ga->class);
3220                 free(ga->style);
3221                 free(ga->title);
3222                 freescriptevents(ga->events);
3223         }
3224         free(it);
3225 }
3226
3227 // Free list of items chained through next pointer
3228 void
3229 freeitems(Item* ithead)
3230 {
3231         Item* it;
3232         Item* itnext;
3233
3234         it = ithead;
3235         while(it != nil) {
3236                 itnext = it->next;
3237                 freeitem(it);
3238                 it = itnext;
3239         }
3240 }
3241
3242 static void
3243 freeformfield(Formfield* ff)
3244 {
3245         Option* o;
3246         Option* onext;
3247
3248         if(ff == nil)
3249                 return;
3250
3251         free(ff->name);
3252         free(ff->value);
3253         for(o = ff->options; o != nil; o = onext) {
3254                 onext = o->next;
3255                 free(o->value);
3256                 free(o->display);
3257         }
3258         free(ff);
3259 }
3260
3261 static void
3262 freetable(Table* t)
3263 {
3264         int i;
3265         Tablecell* c;
3266         Tablecell* cnext;
3267
3268         if(t == nil)
3269                 return;
3270
3271         // We'll find all the unique cells via t->cells and next pointers.
3272         // (Other pointers to cells in the table are duplicates of these)
3273         for(c = t->cells; c != nil; c = cnext) {
3274                 cnext = c->next;
3275                 freeitems(c->content);
3276         }
3277         if(t->grid != nil) {
3278                 for(i = 0; i < t->nrow; i++)
3279                         free(t->grid[i]);
3280                 free(t->grid);
3281         }
3282         free(t->rows);
3283         free(t->cols);
3284         freeitems(t->caption);
3285         free(t);
3286 }
3287
3288 static void
3289 freeform(Form* f)
3290 {
3291         if(f == nil)
3292                 return;
3293
3294         free(f->name);
3295         free(f->action);
3296         // Form doesn't own its fields (Iformfield items do)
3297         free(f);
3298 }
3299
3300 static void
3301 freeforms(Form* fhead)
3302 {
3303         Form* f;
3304         Form* fnext;
3305
3306         for(f = fhead; f != nil; f = fnext) {
3307                 fnext = f->next;
3308                 freeform(f);
3309         }
3310 }
3311
3312 static void
3313 freeanchor(Anchor* a)
3314 {
3315         if(a == nil)
3316                 return;
3317
3318         free(a->name);
3319         free(a->href);
3320         free(a);
3321 }
3322
3323 static void
3324 freeanchors(Anchor* ahead)
3325 {
3326         Anchor* a;
3327         Anchor* anext;
3328
3329         for(a = ahead; a != nil; a = anext) {
3330                 anext = a->next;
3331                 freeanchor(a);
3332         }
3333 }
3334
3335 static void
3336 freedestanchor(DestAnchor* da)
3337 {
3338         if(da == nil)
3339                 return;
3340
3341         free(da->name);
3342         free(da);
3343 }
3344
3345 static void
3346 freedestanchors(DestAnchor* dahead)
3347 {
3348         DestAnchor* da;
3349         DestAnchor* danext;
3350
3351         for(da = dahead; da != nil; da = danext) {
3352                 danext = da->next;
3353                 freedestanchor(da);
3354         }
3355 }
3356
3357 static void
3358 freearea(Area* a)
3359 {
3360         if(a == nil)
3361                 return;
3362         free(a->href);
3363         free(a->coords);
3364 }
3365
3366 static void freekidinfos(Kidinfo* khead);
3367
3368 static void
3369 freekidinfo(Kidinfo* k)
3370 {
3371         if(k->isframeset) {
3372                 free(k->rows);
3373                 free(k->cols);
3374                 freekidinfos(k->kidinfos);
3375         }
3376         else {
3377                 free(k->src);
3378                 free(k->name);
3379         }
3380         free(k);
3381 }
3382
3383 static void
3384 freekidinfos(Kidinfo* khead)
3385 {
3386         Kidinfo* k;
3387         Kidinfo* knext;
3388
3389         for(k = khead; k != nil; k = knext) {
3390                 knext = k->next;
3391                 freekidinfo(k);
3392         }
3393 }
3394
3395 static void
3396 freemap(Map* m)
3397 {
3398         Area* a;
3399         Area* anext;
3400
3401         if(m == nil)
3402                 return;
3403
3404         free(m->name);
3405         for(a = m->areas; a != nil; a = anext) {
3406                 anext = a->next;
3407                 freearea(a);
3408         }
3409         free(m);
3410 }
3411
3412 static void
3413 freemaps(Map* mhead)
3414 {
3415         Map* m;
3416         Map* mnext;
3417
3418         for(m = mhead; m != nil; m = mnext) {
3419                 mnext = m->next;
3420                 freemap(m);
3421         }
3422 }
3423
3424 void
3425 freedocinfo(Docinfo* d)
3426 {
3427         if(d == nil)
3428                 return;
3429         free(d->src);
3430         free(d->base);
3431         freeitem((Item*)d->backgrounditem);
3432         free(d->refresh);
3433         freekidinfos(d->kidinfo);
3434         freeanchors(d->anchors);
3435         freedestanchors(d->dests);
3436         freeforms(d->forms);
3437         freemaps(d->maps);
3438         // tables, images, and formfields are freed when
3439         // the items pointing at them are freed
3440         free(d);
3441 }
3442
3443 // Currently, someone else owns all the memory
3444 // pointed to by things in a Pstate.
3445 static void
3446 freepstate(Pstate* p)
3447 {
3448         free(p);
3449 }
3450
3451 static void
3452 freepstatestack(Pstate* pshead)
3453 {
3454         Pstate* p;
3455         Pstate* pnext;
3456
3457         for(p = pshead; p != nil; p = pnext) {
3458                 pnext = p->next;
3459                 free(p);
3460         }
3461 }
3462
3463 static int
3464 Iconv(Fmt *f)
3465 {
3466         Item*   it;
3467         Itext*  t;
3468         Irule*  r;
3469         Iimage* i;
3470         Ifloat* fl;
3471         int     state;
3472         Formfield*      ff;
3473         Rune*   ty;
3474         Tablecell*      c;
3475         Table*  tab;
3476         char*   p;
3477         int     cl;
3478         int     hang;
3479         int     indent;
3480         int     bi;
3481         int     nbuf;
3482         char    buf[BIGBUFSIZE];
3483
3484         it = va_arg(f->args, Item*);
3485         bi = 0;
3486         nbuf = sizeof(buf);
3487         state = it->state;
3488         nbuf = nbuf-1;
3489         if(state&IFbrk) {
3490                 cl = state&(IFcleft|IFcright);
3491                 p = "";
3492                 if(cl) {
3493                         if(cl == (IFcleft|IFcright))
3494                                 p = " both";
3495                         else if(cl == IFcleft)
3496                                 p = " left";
3497                         else
3498                                 p = " right";
3499                 }
3500                 bi = snprint(buf, nbuf, "brk(%d%s)", (state&IFbrksp)? 1 : 0, p);
3501         }
3502         if(state&IFnobrk)
3503                 bi += snprint(buf+bi, nbuf-bi, " nobrk");
3504         if(!(state&IFwrap))
3505                 bi += snprint(buf+bi, nbuf-bi, " nowrap");
3506         if(state&IFrjust)
3507                 bi += snprint(buf+bi, nbuf-bi, " rjust");
3508         if(state&IFcjust)
3509                 bi += snprint(buf+bi, nbuf-bi, " cjust");
3510         if(state&IFsmap)
3511                 bi += snprint(buf+bi, nbuf-bi, " smap");
3512         indent = (state&IFindentmask) >> IFindentshift;
3513         if(indent > 0)
3514                 bi += snprint(buf+bi, nbuf-bi, " indent=%d", indent);
3515         hang = state&IFhangmask;
3516         if(hang > 0)
3517                 bi += snprint(buf+bi, nbuf-bi, " hang=%d", hang);
3518
3519         switch(it->tag) {
3520         case Itexttag:
3521                 t = (Itext*)it;
3522                 bi += snprint(buf+bi, nbuf-bi, " Text '%S', fnt=%d, fg=%x", t->s, t->fnt, t->fg);
3523                 break;
3524
3525         case Iruletag:
3526                 r = (Irule*)it;
3527                 bi += snprint(buf+bi, nbuf-bi, "Rule size=%d, al=%S, wspec=", r->size, stringalign(r->align));
3528                 bi += dimprint(buf+bi, nbuf-bi, r->wspec);
3529                 break;
3530
3531         case Iimagetag:
3532                 i = (Iimage*)it;
3533                 bi += snprint(buf+bi, nbuf-bi,
3534                         "Image src=%S, alt=%S, al=%S, w=%d, h=%d hsp=%d, vsp=%d, bd=%d, map=%S",
3535                         i->imsrc, i->altrep? i->altrep : L"", stringalign(i->align), i->imwidth, i->imheight,
3536                         i->hspace, i->vspace, i->border, i->map? i->map->name : L"");
3537                 break;
3538
3539         case Iformfieldtag:
3540                 ff = ((Iformfield*)it)->formfield;
3541                 if(ff->ftype == Ftextarea)
3542                         ty = L"textarea";
3543                 else if(ff->ftype == Fselect)
3544                         ty = L"select";
3545                 else {
3546                         ty = _revlookup(input_tab, NINPUTTAB, ff->ftype);
3547                         if(ty == nil)
3548                                 ty = L"none";
3549                 }
3550                 bi += snprint(buf+bi, nbuf-bi, "Formfield %S, fieldid=%d, formid=%d, name=%S, value=%S",
3551                         ty, ff->fieldid, ff->form->formid, ff->name? ff->name : L"",
3552                         ff->value? ff->value : L"");
3553                 break;
3554
3555         case Itabletag:
3556                 tab = ((Itable*)it)->table;
3557                 bi += snprint(buf+bi, nbuf-bi, "Table tableid=%d, width=", tab->tableid);
3558                 bi += dimprint(buf+bi, nbuf-bi, tab->width);
3559                 bi += snprint(buf+bi, nbuf-bi, ", nrow=%d, ncol=%d, ncell=%d, totw=%d, toth=%d\n",
3560                         tab->nrow, tab->ncol, tab->ncell, tab->totw, tab->toth);
3561                 for(c = tab->cells; c != nil; c = c->next)
3562                         bi += snprint(buf+bi, nbuf-bi, "Cell %d.%d, at (%d,%d) ",
3563                                         tab->tableid, c->cellid, c->row, c->col);
3564                 bi += snprint(buf+bi, nbuf-bi, "End of Table %d", tab->tableid);
3565                 break;
3566
3567         case Ifloattag:
3568                 fl = (Ifloat*)it;
3569                 bi += snprint(buf+bi, nbuf-bi, "Float, x=%d y=%d, side=%S, it=%I",
3570                         fl->x, fl->y, stringalign(fl->side), fl->item);
3571                 bi += snprint(buf+bi, nbuf-bi, "\n\t");
3572                 break;
3573
3574         case Ispacertag:
3575                 p = "";
3576                 switch(((Ispacer*)it)->spkind) {
3577                 case ISPnull:
3578                         p = "null";
3579                         break;
3580                 case ISPvline:
3581                         p = "vline";
3582                         break;
3583                 case ISPhspace:
3584                         p = "hspace";
3585                         break;
3586                 }
3587                 bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
3588                 break;
3589         }
3590         bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
3591                         it->width, it->height, it->ascent, it->anchorid);
3592         buf[bi] = 0;
3593         return fmtstrcpy(f, buf);
3594 }
3595
3596 // String version of alignment 'a'
3597 static Rune*
3598 stringalign(int a)
3599 {
3600         Rune*   s;
3601
3602         s = _revlookup(align_tab, NALIGNTAB, a);
3603         if(s == nil)
3604                 s = L"none";
3605         return s;
3606 }
3607
3608 // Put at most nbuf chars of representation of d into buf,
3609 // and return number of characters put
3610 static int
3611 dimprint(char* buf, int nbuf, Dimen d)
3612 {
3613         int     n;
3614         int     k;
3615
3616         n = 0;
3617         n += snprint(buf, nbuf, "%d", dimenspec(d));
3618         k = dimenkind(d);
3619         if(k == Dpercent)
3620                 buf[n++] = '%';
3621         if(k == Drelative)
3622                 buf[n++] = '*';
3623         return n;
3624 }
3625
3626 void
3627 printitems(Item* items, char* msg)
3628 {
3629         Item*   il;
3630
3631         fprint(2, "%s\n", msg);
3632         il = items;
3633         while(il != nil) {
3634                 fprint(2, "%I", il);
3635                 il = il->next;
3636         }
3637 }
3638
3639 static Genattr*
3640 newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events)
3641 {
3642         Genattr* g;
3643
3644         g = (Genattr*)emalloc(sizeof(Genattr));
3645         g->id = id;
3646         g->class = class;
3647         g->style = style;
3648         g->title = title;
3649         g->events = events;
3650         return g;
3651 }
3652
3653 static Formfield*
3654 newformfield(int ftype, int fieldid, Form* form, Rune* name,
3655                 Rune* value, int size, int maxlength, Formfield* link)
3656 {
3657         Formfield* ff;
3658
3659         ff = (Formfield*)emalloc(sizeof(Formfield));
3660         ff->ftype = ftype;
3661         ff->fieldid = fieldid;
3662         ff->form = form;
3663         ff->name = name;
3664         ff->value = value;
3665         ff->size = size;
3666         ff->maxlength = maxlength;
3667         ff->ctlid = -1;
3668         ff->next = link;
3669         return ff;
3670 }
3671
3672 // Transfers ownership of value and display to Option.
3673 static Option*
3674 newoption(int selected, Rune* value, Rune* display, Option* link)
3675 {
3676         Option *o;
3677
3678         o = (Option*)emalloc(sizeof(Option));
3679         o->selected = selected;
3680         o->value = value;
3681         o->display = display;
3682         o->next = link;
3683         return o;
3684 }
3685
3686 static Form*
3687 newform(int formid, Rune* name, Rune* action, int target, int method, Form* link)
3688 {
3689         Form* f;
3690
3691         f = (Form*)emalloc(sizeof(Form));
3692         f->formid = formid;
3693         f->name = name;
3694         f->action = action;
3695         f->target = target;
3696         f->method = method;
3697         f->nfields = 0;
3698         f->fields = nil;
3699         f->next = link;
3700         return f;
3701 }
3702
3703 static Table*
3704 newtable(int tableid, Align align, Dimen width, int border,
3705         int cellspacing, int cellpadding, Background bg, Token* tok, Table* link)
3706 {
3707         Table* t;
3708
3709         t = (Table*)emalloc(sizeof(Table));
3710         t->tableid = tableid;
3711         t->align = align;
3712         t->width = width;
3713         t->border = border;
3714         t->cellspacing = cellspacing;
3715         t->cellpadding = cellpadding;
3716         t->background = bg;
3717         t->caption_place = ALbottom;
3718         t->caption_lay = nil;
3719         t->tabletok = tok;
3720         t->tabletok = nil;
3721         t->next = link;
3722         return t;
3723 }
3724
3725 static Tablerow*
3726 newtablerow(Align align, Background bg, int flags, Tablerow* link)
3727 {
3728         Tablerow* tr;
3729
3730         tr = (Tablerow*)emalloc(sizeof(Tablerow));
3731         tr->align = align;
3732         tr->background = bg;
3733         tr->flags = flags;
3734         tr->next = link;
3735         return tr;
3736 }
3737
3738 static Tablecell*
3739 newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec,
3740                 Background bg, int flags, Tablecell* link)
3741 {
3742         Tablecell* c;
3743
3744         c = (Tablecell*)emalloc(sizeof(Tablecell));
3745         c->cellid = cellid;
3746         c->lay = nil;
3747         c->rowspan = rowspan;
3748         c->colspan = colspan;
3749         c->align = align;
3750         c->flags = flags;
3751         c->wspec = wspec;
3752         c->hspec = hspec;
3753         c->background = bg;
3754         c->next = link;
3755         return c;
3756 }
3757
3758 static Anchor*
3759 newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
3760 {
3761         Anchor* a;
3762
3763         a = (Anchor*)emalloc(sizeof(Anchor));
3764         a->index = index;
3765         a->name = name;
3766         a->href = href;
3767         a->target = target;
3768         a->next = link;
3769         return a;
3770 }
3771
3772 static DestAnchor*
3773 newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
3774 {
3775         DestAnchor* d;
3776
3777         d = (DestAnchor*)emalloc(sizeof(DestAnchor));
3778         d->index = index;
3779         d->name = name;
3780         d->item = item;
3781         d->next = link;
3782         return d;
3783 }
3784
3785 static SEvent*
3786 newscriptevent(int type, Rune* script, SEvent* link)
3787 {
3788         SEvent* ans;
3789
3790         ans = (SEvent*)emalloc(sizeof(SEvent));
3791         ans->type = type;
3792         ans->script = script;
3793         ans->next = link;
3794         return ans;
3795 }
3796
3797 static void
3798 freescriptevents(SEvent* ehead)
3799 {
3800         SEvent* e;
3801         SEvent* nexte;
3802
3803         e = ehead;
3804         while(e != nil) {
3805                 nexte = e->next;
3806                 free(e->script);
3807                 free(e);
3808                 e = nexte;
3809         }
3810 }
3811
3812 static Dimen
3813 makedimen(int kind, int spec)
3814 {
3815         Dimen d;
3816
3817         if(spec&Dkindmask) {
3818                 if(warn)
3819                         fprint(2, "warning: dimension spec too big: %d\n", spec);
3820                 spec = 0;
3821         }
3822         d.kindspec = kind|spec;
3823         return d;
3824 }
3825
3826 int
3827 dimenkind(Dimen d)
3828 {
3829         return (d.kindspec&Dkindmask);
3830 }
3831
3832 int
3833 dimenspec(Dimen d)
3834 {
3835         return (d.kindspec&Dspecmask);
3836 }
3837
3838 static Kidinfo*
3839 newkidinfo(int isframeset, Kidinfo* link)
3840 {
3841         Kidinfo*        ki;
3842
3843         ki = (Kidinfo*)emalloc(sizeof(Kidinfo));
3844         ki->isframeset = isframeset;
3845         if(!isframeset) {
3846                 ki->flags = FRhscrollauto|FRvscrollauto;
3847                 ki->marginw = FRKIDMARGIN;
3848                 ki->marginh = FRKIDMARGIN;
3849                 ki->framebd = 1;
3850         }
3851         ki->next = link;
3852         return ki;
3853 }
3854
3855 static Docinfo*
3856 newdocinfo(void)
3857 {
3858         Docinfo*        d;
3859
3860         d = (Docinfo*)emalloc(sizeof(Docinfo));
3861         resetdocinfo(d);
3862         return d;
3863 }
3864
3865 static void
3866 resetdocinfo(Docinfo* d)
3867 {
3868         memset(d, 0, sizeof(Docinfo));
3869         d->background = makebackground(nil, White);
3870         d->text = Black;
3871         d->link = Blue;
3872         d->vlink = Blue;
3873         d->alink = Blue;
3874         d->target = FTself;
3875         d->chset = ISO_8859_1;
3876         d->scripttype = TextJavascript;
3877         d->frameid = -1;
3878 }
3879
3880 // Use targetmap array to keep track of name <-> targetid mapping.
3881 // Use real malloc(), and never free
3882 static void
3883 targetmapinit(void)
3884 {
3885         int l;
3886
3887         targetmapsize = 10;
3888         l = targetmapsize*sizeof *targetmap;
3889         targetmap = emalloc(l);
3890         memset(targetmap, 0, l);
3891         targetmap[0].key = _Strdup(L"_top");
3892         targetmap[0].val = FTtop;
3893         targetmap[1].key = _Strdup(L"_self");
3894         targetmap[1].val = FTself;
3895         targetmap[2].key = _Strdup(L"_parent");
3896         targetmap[2].val = FTparent;
3897         targetmap[3].key = _Strdup(L"_blank");
3898         targetmap[3].val = FTblank;
3899         ntargets = 4;
3900 }
3901
3902 int
3903 targetid(Rune* s)
3904 {
3905         int i;
3906         int n;
3907
3908         n = _Strlen(s);
3909         if(n == 0)
3910                 return FTself;
3911         for(i = 0; i < ntargets; i++)
3912                 if(_Strcmp(s, targetmap[i].key) == 0)
3913                         return targetmap[i].val;
3914         if(i == targetmapsize) {
3915                 targetmapsize += 10;
3916                 targetmap = erealloc(targetmap, targetmapsize*sizeof(StringInt));
3917         }
3918         targetmap[i].key = _Strdup(s);
3919         targetmap[i].val = i;
3920         ntargets++;
3921         return i;
3922 }
3923
3924 Rune*
3925 targetname(int targid)
3926 {
3927         int i;
3928
3929         for(i = 0; i < ntargets; i++)
3930                 if(targetmap[i].val == targid)
3931                         return targetmap[i].key;
3932         return L"?";
3933 }
3934
3935 // Convert HTML color spec to RGB value, returning dflt if can't.
3936 // Argument is supposed to be a valid HTML color, or "".
3937 // Return the RGB value of the color, using dflt if s
3938 // is nil or an invalid color.
3939 static int
3940 color(Rune* s, int dflt)
3941 {
3942         int v;
3943         Rune* rest;
3944
3945         if(s == nil)
3946                 return dflt;
3947         if(_lookup(color_tab, NCOLORS, s, _Strlen(s), &v))
3948                 return v;
3949         if(s[0] == '#')
3950                 s++;
3951         v = _Strtol(s, &rest, 16);
3952         if(*rest == 0)
3953                 return v;
3954         return dflt;
3955 }
3956
3957 // Debugging
3958
3959 #define HUGEPIX 10000
3960
3961 // A "shallow" validitem, that doesn't follow next links
3962 // or descend into tables.
3963 static int
3964 validitem(Item* i)
3965 {
3966         int ok;
3967         Itext* ti;
3968         Irule* ri;
3969         Iimage* ii;
3970         Ifloat* fi;
3971         int a;
3972
3973         ok = (i->tag >= Itexttag && i->tag <= Ispacertag) &&
3974                 (i->next == nil || validptr(i->next)) &&
3975                 (i->width >= 0 && i->width < HUGEPIX) &&
3976                 (i->height >= 0 && i->height < HUGEPIX) &&
3977                 (i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
3978                 (i->anchorid >= 0) &&
3979                 (i->genattr == nil || validptr(i->genattr));
3980         // also, could check state for ridiculous combinations
3981         // also, could check anchorid for within-doc-range
3982         if(ok)
3983                 switch(i->tag) {
3984                 case Itexttag:
3985                         ti = (Itext*)i;
3986                         ok = validStr(ti->s) &&
3987                                 (ti->fnt >= 0 && ti->fnt < NumStyle*NumSize) &&
3988                                 (ti->ul == ULnone || ti->ul == ULunder || ti->ul == ULmid);
3989                         break;
3990                 case Iruletag:
3991                         ri = (Irule*)i;
3992                         ok = (validvalign(ri->align) || validhalign(ri->align)) &&
3993                                 (ri->size >=0 && ri->size < HUGEPIX);
3994                         break;
3995                 case Iimagetag:
3996                         ii = (Iimage*)i;
3997                         ok = (ii->imsrc == nil || validptr(ii->imsrc)) &&
3998                                 (ii->width >= 0 && ii->width < HUGEPIX) &&
3999                                 (ii->height >= 0 && ii->height < HUGEPIX) &&
4000                                 (ii->imwidth >= 0 && ii->imwidth < HUGEPIX) &&
4001                                 (ii->imheight >= 0 && ii->imheight < HUGEPIX) &&
4002                                 (ii->altrep == nil || validStr(ii->altrep)) &&
4003                                 (ii->map == nil || validptr(ii->map)) &&
4004                                 (validvalign(ii->align) || validhalign(ii->align)) &&
4005                                 (ii->nextimage == nil || validptr(ii->nextimage));
4006                         break;
4007                 case Iformfieldtag:
4008                         ok = validformfield(((Iformfield*)i)->formfield);
4009                         break;
4010                 case Itabletag:
4011                         ok = validptr((Itable*)i);
4012                         break;
4013                 case Ifloattag:
4014                         fi = (Ifloat*)i;
4015                         ok = (fi->side == ALleft || fi->side == ALright) &&
4016                                 validitem(fi->item) &&
4017                                 (fi->item->tag == Iimagetag || fi->item->tag == Itabletag);
4018                         break;
4019                 case Ispacertag:
4020                         a = ((Ispacer*)i)->spkind;
4021                         ok = a==ISPnull || a==ISPvline || a==ISPhspace || a==ISPgeneral;
4022                         break;
4023                 default:
4024                         ok = 0;
4025                 }
4026         return ok;
4027 }
4028
4029 // "deep" validation, that checks whole list of items,
4030 // and descends into tables and floated tables.
4031 // nil is ok for argument.
4032 int
4033 validitems(Item* i)
4034 {
4035         int ok;
4036         Item* ii;
4037
4038         ok = 1;
4039         while(i != nil && ok) {
4040                 ok = validitem(i);
4041                 if(ok) {
4042                         if(i->tag == Itabletag) {
4043                                 ok = validtable(((Itable*)i)->table);
4044                         }
4045                         else if(i->tag == Ifloattag) {
4046                                 ii = ((Ifloat*)i)->item;
4047                                 if(ii->tag == Itabletag)
4048                                         ok = validtable(((Itable*)ii)->table);
4049                         }
4050                 }
4051                 if(!ok) {
4052                         fprint(2, "invalid item: %I\n", i);
4053                 }
4054                 i = i->next;
4055         }
4056         return ok;
4057 }
4058
4059 static int
4060 validformfield(Formfield* f)
4061 {
4062         int ok;
4063
4064         ok = (f->next == nil || validptr(f->next)) &&
4065                 (f->ftype >= 0 && f->ftype <= Ftextarea) &&
4066                 f->fieldid >= 0 &&
4067                 (f->form == nil || validptr(f->form)) &&
4068                 (f->name == nil || validStr(f->name)) &&
4069                 (f->value == nil || validStr(f->value)) &&
4070                 (f->options == nil || validptr(f->options)) &&
4071                 (f->image == nil || validitem(f->image)) &&
4072                 (f->events == nil || validptr(f->events));
4073         // when all built, should have f->fieldid < f->form->nfields,
4074         // but this may be called during build...
4075         return ok;
4076 }
4077
4078 // "deep" validation -- checks cell contents too
4079 static int
4080 validtable(Table* t)
4081 {
4082         int ok;
4083         int i, j;
4084         Tablecell* c;
4085
4086         ok = (t->next == nil || validptr(t->next)) &&
4087                 t->nrow >= 0 &&
4088                 t->ncol >= 0 &&
4089                 t->ncell >= 0 &&
4090                 validalign(t->align) &&
4091                 validdimen(t->width) &&
4092                 (t->border >= 0 && t->border < HUGEPIX) &&
4093                 (t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
4094                 (t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
4095                 validitems(t->caption) &&
4096                 (t->caption_place == ALtop || t->caption_place == ALbottom) &&
4097                 (t->totw >= 0 && t->totw < HUGEPIX) &&
4098                 (t->toth >= 0 && t->toth < HUGEPIX) &&
4099                 (t->tabletok == nil || validptr(t->tabletok));
4100         // during parsing, t->rows has list;
4101         // only when parsing is done is t->nrow set > 0
4102         if(ok && t->nrow > 0 && t->ncol > 0) {
4103                 // table is "finished"
4104                 for(i = 0; i < t->nrow && ok; i++) 
4105                         ok = validtablerow(t->rows+i);
4106                 for(j = 0; j < t->ncol && ok; j++)
4107                         ok = validtablecol(t->cols+j);
4108                 for(c = t->cells; c != nil && ok; c = c->next)
4109                         ok = validtablecell(c);
4110                 for(i = 0; i < t->nrow && ok; i++)
4111                         for(j = 0; j < t->ncol && ok; j++)
4112                                 ok = validptr(t->grid[i][j]);
4113         }
4114         return ok;
4115 }
4116
4117 static int
4118 validvalign(int a)
4119 {
4120         return a == ALnone || a == ALmiddle || a == ALbottom || a == ALtop || a == ALbaseline;
4121 }
4122
4123 static int
4124 validhalign(int a)
4125 {
4126         return a == ALnone || a == ALleft || a == ALcenter || a == ALright ||
4127                         a == ALjustify || a == ALchar;
4128 }
4129
4130 static int
4131 validalign(Align a)
4132 {
4133         return validhalign(a.halign) && validvalign(a.valign);
4134 }
4135
4136 static int
4137 validdimen(Dimen d)
4138 {
4139         int ok;
4140         int s;
4141
4142         ok = 0;
4143         s = d.kindspec&Dspecmask;
4144         switch(d.kindspec&Dkindmask) {
4145         case Dnone:
4146                 ok = s==0;
4147                 break;
4148         case Dpixels:
4149                 ok = s < HUGEPIX;
4150                 break;
4151         case Dpercent:
4152         case Drelative:
4153                 ok = 1;
4154                 break;
4155         }
4156         return ok;
4157 }
4158
4159 static int
4160 validtablerow(Tablerow* r)
4161 {
4162         return (r->cells == nil || validptr(r->cells)) &&
4163                 (r->height >= 0 && r->height < HUGEPIX) &&
4164                 (r->ascent > -HUGEPIX && r->ascent < HUGEPIX) &&
4165                 validalign(r->align);
4166 }
4167
4168 static int
4169 validtablecol(Tablecol* c)
4170 {
4171         return c->width >= 0 && c->width < HUGEPIX
4172                 && validalign(c->align);
4173 }
4174
4175 static int
4176 validtablecell(Tablecell* c)
4177 {
4178         int ok;
4179
4180         ok = (c->next == nil || validptr(c->next)) &&
4181                 (c->nextinrow == nil || validptr(c->nextinrow)) &&
4182                 (c->content == nil || validptr(c->content)) &&
4183                 (c->lay == nil || validptr(c->lay)) &&
4184                 c->rowspan >= 0 &&
4185                 c->colspan >= 0 &&
4186                 validalign(c->align) &&
4187                 validdimen(c->wspec) &&
4188                 c->row >= 0 &&
4189                 c->col >= 0;
4190         if(ok) {
4191                 if(c->content != nil)
4192                         ok = validitems(c->content);
4193         }
4194         return ok;
4195 }
4196
4197 static int
4198 validptr(void* p)
4199 {
4200         // TODO: a better job of this.
4201         // For now, just dereference, which cause a bomb
4202         // if not valid
4203         static char c;
4204
4205         c = *((char*)p);
4206         return 1;
4207 }
4208
4209 static int
4210 validStr(Rune* s)
4211 {
4212         return s != nil && validptr(s);
4213 }