4 #define NSTACK 100 /* html grammar is not recursive, so 30 or so should do */
5 #define NHBUF 8192 /* Input buffer size */
6 #define NPEEKC 3 /* Maximum lookahead */
7 #define NTOKEN 4096 /* Maximum token length */
8 #define NATTR 512 /* Maximum number of attributes of a tag */
9 typedef struct Pair Pair;
10 typedef struct Tag Tag;
11 typedef struct Stack Stack;
12 typedef struct Hglob Hglob;
13 typedef struct Form Form;
14 typedef struct Entity Entity;
28 int tag; /* html tag being processed */
29 int pre; /* in preformatted text? */
30 int font; /* typeface */
31 int size; /* point size of text */
32 int margin; /* left margin position */
33 int indent; /* extra indent at paragraph start */
34 int number; /* paragraph number */
35 int ismap; /* flag of <img> */
36 int isscript; /* inside <script> */
37 int width; /* size of image */
39 char image[NNAME]; /* arg of <img> */
40 char link[NNAME]; /* arg of <a href=...> */
41 char name[NNAME]; /* arg of <a name=...> */
45 * Globals -- these are packed up into a struct that gets passed around
46 * so that multiple parsers can run concurrently
49 char *tp; /* pointer in text buffer */
50 char *name; /* input file name */
51 int hfd; /* input file descriptor */
52 char hbuf[NHBUF]; /* input buffer */
53 char *hbufp; /* next character in buffer */
54 char *ehbuf; /* end of good characters in buffer */
55 int heof; /* end of file flag */
56 int peekc[NPEEKC]; /* characters to re-read */
57 int npeekc; /* # of characters to re-read */
58 char token[NTOKEN]; /* if token type is TEXT */
59 Pair attr[NATTR]; /* tag attribute/value pairs */
60 int nsp; /* # of white-space characters before TEXT token */
61 int spacc; /* place to accumulate more spaces */
62 /* if negative, won't accumulate! */
63 int tag; /* if token type is TAG or END */
64 Stack stack[NSTACK]; /* parse stack */
65 Stack *state; /* parse stack pointer */
66 int lineno; /* input line number */
67 int linebrk; /* flag set if we require a line-break in output */
68 int para; /* flag set if we need an indent at the break */
69 char *text; /* text buffer */
70 char *etext; /* end of text buffer */
71 Form *form; /* data for form under construction */
72 Www *dst; /* where the text goes */
85 * Magic characters corresponding to
86 * literal < followed by / ! or alpha,
121 int strtolength(Hglob *g, int dir, char *str);
124 * Token names for the html parser.
125 * Tag_end corresponds to </end> tags.
126 * Tag_text tags text not in a tag.
127 * Those two must follow the others.
157 Tag_frame, /* rm 5.8.97 */
193 Tag_table, /* rm 3.8.00 */
205 Tag_end, /* also used to indicate unrecognized start tag */
210 END=1, /* tag must have a matching end tag */
211 NOEND, /* tag must not have a matching end tag */
212 OPTEND, /* tag may have a matching end tag */
213 ERR, /* tag must not occur */
216 void rdform(Hglob *);
217 void endform(Hglob *);
218 char *pl_getattr(Pair *, char *);
219 int pl_hasattr(Pair *, char *);
220 void pl_htmloutput(Hglob *, int, char *, Field *);