]> git.lizzy.rs Git - plan9front.git/blob - sys/src/ape/lib/regexp/regexec.c
remove old libregexp files; add headers for upas/bayes
[plan9front.git] / sys / src / ape / lib / regexp / regexec.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5
6 static Resublist sempty;                /* empty set of matches */
7
8 /*
9  *  return      0 if no match
10  *              >0 if a match
11  *              <0 if we ran out of _relist space
12  */
13 static int
14 regexec1(Reprog *progp, /* program to run */
15         char *bol,      /* string to run machine on */
16         Resub *mp,      /* subexpression elements */
17         int ms,         /* number of elements at mp */
18         char *starts,
19         char *eol,
20         wchar_t startchar)
21 {
22         int flag=0;
23         Reinst *inst;
24         Relist *tlp;
25         char *s;
26         int i, checkstart;
27         wchar_t r, *rp, *ep;
28         int n;
29         Relist* tl;             /* This list, next list */
30         Relist* nl;
31         Relist* tle;            /* ends of this and next list */
32         Relist* nle;
33         int match;
34
35         match = 0;
36         checkstart = startchar;
37         sempty.m[0].s.sp = 0;
38         if(mp!=0)
39                 for(i=0; i<ms; i++)
40                         mp[i].s.sp = mp[i].e.ep = 0;
41         _relist[0][0].inst = _relist[1][0].inst = 0;
42
43         /* Execute machine once for each character, including terminal NUL */
44         s = starts;
45         do{
46                 /* fast check for first char */
47                 r = *(unsigned char*)s;
48                 if(checkstart && r != startchar){
49                         s++;
50                         continue;
51                 }
52
53                 if(r < Runeself)
54                         n = 1;
55                 else {
56                         n = mbtowc(&r, s, MB_CUR_MAX);
57                         if (n <= 0)
58                                 n = 1;
59                 }
60
61                 /* switch run lists */
62                 tl = _relist[flag];
63                 tle = _reliste[flag];
64                 nl = _relist[flag^=1];
65                 nle = _reliste[flag];
66                 nl->inst = 0;
67
68                 /* Add first instruction to current list */
69                 if(match == 0){
70                         sempty.m[0].s.sp = s;
71                         _renewthread(tl, progp->startinst, &sempty);
72                 }
73
74                 /* Execute machine until current list is empty */
75                 for(tlp=tl; tlp->inst; tlp++){  /* assignment = */
76                         if(s == eol)
77                                 break;
78
79                         for(inst = tlp->inst; ; inst = inst->l.next){
80                                 switch(inst->type){
81                                 case RUNE:      /* regular character */
82                                         if(inst->r.r == r)
83                                                 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
84                                                         return -1;
85                                         break;
86                                 case LBRA:
87                                         tlp->se.m[inst->r.subid].s.sp = s;
88                                         continue;
89                                 case RBRA:
90                                         tlp->se.m[inst->r.subid].e.ep = s;
91                                         continue;
92                                 case ANY:
93                                         if(r != '\n')
94                                                 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
95                                                         return -1;
96                                         break;
97                                 case ANYNL:
98                                         if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
99                                                 return -1;
100                                         break;
101                                 case BOL:
102                                         if(s == bol || *(s-1) == '\n')
103                                                 continue;
104                                         break;
105                                 case EOL:
106                                         if(r == 0 || r == '\n')
107                                                 continue;
108                                         break;
109                                 case CCLASS:
110                                         ep = inst->r.cp->end;
111                                         for(rp = inst->r.cp->spans; rp < ep; rp += 2)
112                                                 if(r >= rp[0] && r <= rp[1]){
113                                                         if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
114                                                                 return -1;
115                                                         break;
116                                                 }
117                                         break;
118                                 case NCCLASS:
119                                         ep = inst->r.cp->end;
120                                         for(rp = inst->r.cp->spans; rp < ep; rp += 2)
121                                                 if(r >= rp[0] && r <= rp[1])
122                                                         break;
123                                         if(rp == ep)
124                                                 if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
125                                                         return -1;
126                                         break;
127                                 case OR:
128                                         /* evaluate right choice later */
129                                         if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
130                                                 return -1;
131                                         /* efficiency: advance and re-evaluate */
132                                         continue;
133                                 case END:       /* Match! */
134                                         match = 1;
135                                         tlp->se.m[0].e.ep = s;
136                                         if(mp != 0)
137                                                 _renewmatch(mp, ms, &tlp->se);
138                                         break;
139                                 }
140                                 break;
141                         }
142                 }
143                 checkstart = startchar && nl->inst==0;
144                 s += n;
145         }while(r);
146         return match;
147 }
148
149 extern int
150 regexec(Reprog *progp,  /* program to run */
151         char *bol,      /* string to run machine on */
152         Resub *mp,      /* subexpression elements */
153         int ms)         /* number of elements at mp */
154 {
155         char *starts;   /* where to start match */
156         char *eol;      /* where to end match */
157         wchar_t startchar;
158         int rv;
159
160         /*
161          *  use user-specified starting/ending location if specified
162          */
163         starts = bol;
164         eol = 0;
165         if(mp && ms>0){
166                 if(mp->s.sp)
167                         starts = mp->s.sp;
168                 if(mp->e.ep)
169                         eol = mp->e.ep;
170         }
171         startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
172                 ? progp->startinst->r.r : 0;
173
174         /* keep trying till we have enough list space to terminate */
175         for(;;){
176                 if(_relist[0] == 0){
177                         _relist[0] = malloc(2*_relistsize*sizeof(Relist));
178                         _relist[1] = _relist[0] + _relistsize;
179                         _reliste[0] = _relist[0] + _relistsize - 1;
180                         _reliste[1] = _relist[1] + _relistsize - 1;
181                         if(_relist[0] == 0)
182                                 regerror("_relist overflow");
183                 }
184                 rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
185                 if(rv >= 0)
186                         return rv;
187                 free(_relist[0]);
188                 _relist[0] = 0;
189                 _relistsize += LISTINCREMENT;
190         }
191 }