4 #define NSTACK 100 /* html grammar is not recursive, so 30 or so should do */
5 #define NHBUF 8192 /* Input buffer size */
6 #define NPEEKC 3 /* Maximum lookahead */
7 #define NTOKEN 65536 /* Maximum token length */
8 #define NATTR 512 /* Maximum number of attributes of a tag */
9 typedef struct Pair Pair;
10 typedef struct Tag Tag;
11 typedef struct Stack Stack;
12 typedef struct Hglob Hglob;
13 typedef struct Form Form;
14 typedef struct Entity Entity;
28 int tag; /* html tag being processed */
29 int pre; /* in preformatted text? */
30 int font; /* typeface */
31 int size; /* point size of text */
32 int sub; /* < 0 superscript, > 0 subscript */
33 int margin; /* left margin position */
34 int indent; /* extra indent at paragraph start */
35 int number; /* paragraph number */
36 int ismap; /* flag of <img> */
37 int isscript; /* inside <script> */
38 int strike; /* flag of <strike> */
39 int width; /* size of image */
41 char *image; /* arg of <img> */
42 char *link; /* arg of <a href=...> */
43 char *name; /* arg of <a name=...> */
47 * Globals -- these are packed up into a struct that gets passed around
48 * so that multiple parsers can run concurrently
51 char *tp; /* pointer in text buffer */
52 char *name; /* input file name */
53 int hfd; /* input file descriptor */
54 char hbuf[NHBUF]; /* input buffer */
55 char *hbufp; /* next character in buffer */
56 char *ehbuf; /* end of good characters in buffer */
57 int heof; /* end of file flag */
58 int peekc[NPEEKC]; /* characters to re-read */
59 int npeekc; /* # of characters to re-read */
60 char token[NTOKEN]; /* if token type is TEXT */
61 Pair attr[NATTR]; /* tag attribute/value pairs */
62 int nsp; /* # of white-space characters before TEXT token */
63 int spacc; /* place to accumulate more spaces */
64 /* if negative, won't accumulate! */
65 int tag; /* if token type is TAG or END */
66 Stack stack[NSTACK]; /* parse stack */
67 Stack *state; /* parse stack pointer */
68 int lineno; /* input line number */
69 int linebrk; /* flag set if we require a line-break in output */
70 int para; /* flag set if we need an indent at the break */
71 char *text; /* text buffer */
72 char *etext; /* end of text buffer */
73 Form *form; /* data for form under construction */
74 Www *dst; /* where the text goes */
87 * Magic characters corresponding to
88 * literal < followed by / ! or alpha,
123 int strtolength(Hglob *g, int dir, char *str);
126 * Token names for the html parser.
127 * Tag_end corresponds to </end> tags.
128 * Tag_text tags text not in a tag.
129 * Those two must follow the others.
160 Tag_frame, /* rm 5.8.97 */
202 Tag_table, /* rm 3.8.00 */
216 Tag_end, /* also used to indicate unrecognized start tag */
221 END=1, /* tag must have a matching end tag */
222 NOEND, /* tag must not have a matching end tag */
223 OPTEND, /* tag may have a matching end tag */
224 ERR, /* tag must not occur */
227 void rdform(Hglob *);
228 void endform(Hglob *);
229 char *pl_getattr(Pair *, char *);
230 int pl_hasattr(Pair *, char *);
231 void pl_htmloutput(Hglob *, int, char *, Field *);
233 #pragma incomplete Form
234 #pragma incomplete Field