]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/mothra/html.h
fix ref822 again: remove uniqarray(), fix case with many entries in 'n'.
[plan9front.git] / sys / src / cmd / mothra / html.h
1 /*
2  * Parameters
3  */
4 #define NSTACK  100     /* html grammar is not recursive, so 30 or so should do */
5 #define NHBUF   8192    /* Input buffer size */
6 #define NPEEKC  3       /* Maximum lookahead */
7 #define NTOKEN  65536   /* Maximum token length */
8 #define NATTR   512     /* Maximum number of attributes of a tag */
9 typedef struct Pair Pair;
10 typedef struct Tag Tag;
11 typedef struct Stack Stack;
12 typedef struct Hglob Hglob;
13 typedef struct Form Form;
14 typedef struct Entity Entity;
15 struct Pair{
16         char *name;
17         char *value;
18 };
19 struct Entity{
20         char *name;
21         Rune value;
22 };
23 struct Tag{
24         char *name;
25         int action;
26 };
27 struct Stack{
28         int tag;                /* html tag being processed */
29         int pre;                /* in preformatted text? */
30         int font;               /* typeface */
31         int size;               /* point size of text */
32         int sub;                /* < 0 superscript, > 0 subscript */
33         int margin;             /* left margin position */
34         int indent;             /* extra indent at paragraph start */
35         int number;             /* paragraph number */
36         int ismap;              /* flag of <img> */
37         int isscript;           /* inside <script> */
38         int strike;             /* flag of <strike> */
39         int width;              /* size of image */
40         int height;
41         char *image;            /* arg of <img> */
42         char *link;             /* arg of <a href=...> */
43         char *name;             /* arg of <a name=...> */
44 };
45
46 /*
47  * Globals -- these are packed up into a struct that gets passed around
48  * so that multiple parsers can run concurrently
49  */
50 struct Hglob{
51         char *tp;               /* pointer in text buffer */
52         char *name;             /* input file name */
53         int hfd;                /* input file descriptor */
54         char hbuf[NHBUF];       /* input buffer */
55         char *hbufp;            /* next character in buffer */
56         char *ehbuf;            /* end of good characters in buffer */
57         int heof;               /* end of file flag */
58         int peekc[NPEEKC];      /* characters to re-read */
59         int npeekc;             /* # of characters to re-read */
60         char token[NTOKEN];     /* if token type is TEXT */
61         Pair attr[NATTR];       /* tag attribute/value pairs */
62         int nsp;                /* # of white-space characters before TEXT token */
63         int spacc;              /* place to accumulate more spaces */
64                                 /* if negative, won't accumulate! */
65         int tag;                /* if token type is TAG or END */
66         Stack stack[NSTACK];    /* parse stack */
67         Stack *state;           /* parse stack pointer */
68         int lineno;             /* input line number */
69         int linebrk;            /* flag set if we require a line-break in output */
70         int para;               /* flag set if we need an indent at the break */
71         char *text;             /* text buffer */
72         char *etext;            /* end of text buffer */
73         Form *form;             /* data for form under construction */
74         Www *dst;               /* where the text goes */
75 };
76
77 /*
78  * Token types
79  */
80 enum{
81         TAG=1,
82         ENDTAG,
83         TEXT,
84 };
85
86 /*
87  * Magic characters corresponding to
88  *      literal < followed by / ! or alpha,
89  *      literal > and
90  *      end of file
91  */
92 #define STAG    65536
93 #define ETAG    65537
94 #define EOF     -1
95
96 /*
97  * fonts
98  */
99 enum{
100         ROMAN,
101         ITALIC,
102         BOLD,
103         CWIDTH,
104 };
105
106 /*
107  * font sizes
108  */
109 enum{
110         SMALL,
111         NORMAL,
112         LARGE,
113         ENORMOUS,
114 };
115
116 /*
117  * length direction
118  */
119 enum{
120         HORIZ,
121         VERT,
122 };
123 int strtolength(Hglob *g, int dir, char *str);
124
125 /*
126  * Token names for the html parser.
127  * Tag_end corresponds to </end> tags.
128  * Tag_text tags text not in a tag.
129  * Those two must follow the others.
130  */
131 enum{
132         Tag_comment,
133
134         Tag_a,
135         Tag_abbr,
136         Tag_acronym,
137         Tag_address,
138         Tag_applet,
139         Tag_audio,
140         Tag_b,
141         Tag_base,
142         Tag_blockquot,
143         Tag_body,
144         Tag_br,
145         Tag_button,
146         Tag_center,
147         Tag_cite,
148         Tag_code,
149         Tag_dd,
150         Tag_del,
151         Tag_div,
152         Tag_dfn,
153         Tag_dir,
154         Tag_dl,
155         Tag_dt,
156         Tag_em,
157         Tag_embed,
158         Tag_font,
159         Tag_form,
160         Tag_frame,      /* rm 5.8.97 */
161         Tag_h1,
162         Tag_h2,
163         Tag_h3,
164         Tag_h4,
165         Tag_h5,
166         Tag_h6,
167         Tag_head,
168         Tag_hr,
169         Tag_html,
170         Tag_i,
171         Tag_iframe,
172         Tag_img,
173         Tag_image,
174         Tag_input,
175         Tag_ins,
176         Tag_isindex,
177         Tag_kbd,
178         Tag_key,
179         Tag_li,
180         Tag_link,
181         Tag_listing,
182         Tag_menu,
183         Tag_meta,
184         Tag_nextid,
185         Tag_object,
186         Tag_ol,
187         Tag_option,
188         Tag_p,
189         Tag_plaintext,
190         Tag_pre,
191         Tag_s,
192         Tag_samp,
193         Tag_script,
194         Tag_select,
195         Tag_span,
196         Tag_strike,
197         Tag_strong,
198         Tag_style,
199         Tag_sub,
200         Tag_sup,
201         Tag_source,
202         Tag_table,      /* rm 3.8.00 */
203         Tag_td,
204         Tag_th,
205         Tag_textarea,
206         Tag_title,
207         Tag_tr,
208         Tag_tt,
209         Tag_u,
210         Tag_ul,
211         Tag_var,
212         Tag_video,
213         Tag_wbr,
214         Tag_xmp,
215
216         Tag_end,        /* also used to indicate unrecognized start tag */
217         Tag_text,
218 };
219 enum{
220         NTAG=Tag_end,
221         END=1,  /* tag must have a matching end tag */
222         NOEND,  /* tag must not have a matching end tag */
223         OPTEND, /* tag may have a matching end tag */
224         ERR,            /* tag must not occur */
225 };
226 Tag tag[];
227 void rdform(Hglob *);
228 void endform(Hglob *);
229 char *pl_getattr(Pair *, char *);
230 int pl_hasattr(Pair *, char *);
231 void pl_htmloutput(Hglob *, int, char *, Field *);
232
233 #pragma incomplete Form
234 #pragma incomplete Field
235