7 #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
10 * The following characters will not be escaped:
12 * -_.+!*'(),%#@?=;:/,+&$ alphanum
14 * Note that this character set is the addition of:
16 * - The characters which are safe to be in an URL
17 * - The characters which are *not* safe to be in
18 * an URL because they are RESERVED characters.
20 * We asume (lazily) that any RESERVED char that
21 * appears inside an URL is actually meant to
22 * have its native function (i.e. as an URL
23 * component/separator) and hence needs no escaping.
25 * There are two exceptions: the chacters & (amp)
26 * and ' (single quote) do not appear in the table.
27 * They are meant to appear in the URL as components,
28 * yet they require special HTML-entity escaping
29 * to generate valid HTML markup.
31 * All other characters will be escaped to %XX.
34 static const char HREF_SAFE[] = {
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
41 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
56 static const char hex_chars[] = "0123456789ABCDEF";
60 bufgrow(ob, ESCAPE_GROW_FACTOR(size));
65 while (i < size && HREF_SAFE[src[i]] != 0)
69 bufput(ob, src + org, i - org);
76 /* amp appears all the time in URLs, but needs
77 * HTML-entity escaping to be inside an href */
79 BUFPUTSL(ob, "&");
82 /* the single quote is a valid URL character
83 * according to the standard; it needs HTML
84 * entity escaping too */
86 BUFPUTSL(ob, "'");
89 /* the space can be escaped to %20 or a plus
90 * sign. we're going with the generic escape
91 * for now. the plus thing is more commonly seen
92 * when building GET strings */
99 /* every other character goes with a %XX escaping */
101 hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102 hex_str[2] = hex_chars[src[i] & 0xF];
103 bufput(ob, hex_str, 3);