1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
33 /* This file provides the interface between the main body of
34 * awk and the pattern matching package. It preprocesses
35 * patterns prior to compilation to provide awk-like semantics
36 * to character sequences not supported by the pattern package.
37 * The following conversions are performed:
44 * "\xdddd" -> "\z" where 'z' is the UTF sequence
46 * "\ddd" -> "\o" where 'o' is a char octal value
47 * "\b" -> "\B" where 'B' is backspace
48 * "\t" -> "\T" where 'T' is tab
49 * "\f" -> "\F" where 'F' is form feed
50 * "\n" -> "\N" where 'N' is newline
51 * "\r" -> "\r" where 'C' is cr
56 static char re[MAXRE]; /* copy buffer */
59 int patlen; /* number of chars in pattern */
61 #define NPATS 20 /* number of slots in pattern cache */
63 static struct pat_list /* dynamic pattern cache */
70 static int npats; /* cache fill level */
72 /* Compile a pattern */
80 if (!compile_time) { /* search cache for dynamic pattern */
81 for (i = 0; i < npats; i++)
82 if (!strcmp(pat, pattern[i].re)) {
84 return((void *) pattern[i].program);
87 /* Preprocess Pattern for compilation */
93 quoted(&s, &p, re+MAXRE);
96 else if (!inclass && c == '(' && *s == ')') {
97 if (p < re+MAXRE-2) { /* '()' -> '[]*' */
105 else if (c == '['){ /* '[-' -> '[\-' */
108 if (p < re+MAXRE-2) {
114 } /* '[^-' -> '[^\-'*/
115 else if (*s == '^' && s[1] == '-'){
116 if (p < re+MAXRE-3) {
124 else if (*s == '['){ /* skip '[[' */
130 else if (*s == '^' && s[1] == '[') { /* skip '[^['*/
131 if (p < re+MAXRE-2) {
138 else if (*s == ']') { /* '[]' -> '[]*' */
139 if (p < re+MAXRE-2) {
148 else if (c == '-' && *s == ']') { /* '-]' -> '\-]' */
160 program = regcomp(re); /* compile pattern */
162 if (npats < NPATS) /* Room in cache */
164 else { /* Throw out least used */
165 int use = pattern[0].use;
167 for (j = 1; j < NPATS; j++) {
168 if (pattern[j].use < use) {
169 use = pattern[j].use;
173 xfree(pattern[i].program);
174 xfree(pattern[i].re);
176 pattern[i].re = tostring(pat);
177 pattern[i].program = program;
180 return((void *) program);
183 /* T/F match indication - matched string not exported */
185 match(void *p, char *s, char *)
187 return regexec((Reprog *) p, (char *) s, 0, 0);
190 /* match and delimit the matched string */
192 pmatch(void *p, char *s, char *start)
198 if (regexec((Reprog *) p, (char *) s, &m, 1)) {
208 /* perform a non-empty match */
210 nematch(void *p, char *s, char *start)
212 if (pmatch(p, s, start) == 1 && patlen > 0)
218 /* in the parsing of regular expressions, metacharacters like . have */
219 /* to be seen literally; \056 is not a metacharacter. */
221 hexstr(char **pp) /* find and eval hex string at pp, return new p */
227 for (i = 0, c = (*pp)[i]; i < 4 && isxdigit(c); i++, c = (*pp)[i]) {
229 n = 16 * n + c - '0';
230 else if ('a' <= c && c <= 'f')
231 n = 16 * n + c - 'a' + 10;
232 else if ('A' <= c && c <= 'F')
233 n = 16 * n + c - 'A' + 10;
239 /* look for awk-specific escape sequences */
241 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
244 quoted(char **s, char **to, char *end) /* handle escaped sequence */
267 if (t < end-1) /* all else must be escaped */
269 if (c == 'x') { /* hexadecimal goo follows */
277 } else if (isoctdigit(c)) { /* \d \dd \ddd */
279 if (isoctdigit(*p)) {
280 c = 8 * c + *p++ - '0';
282 c = 8 * c + *p++ - '0';
293 /* pattern package error handler */
304 FATAL("%s", "regular expression too big");