]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libregexp/regexec.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / libregexp / regexec.c
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5
6
7 /*
8  *  return      0 if no match
9  *              >0 if a match
10  *              <0 if we ran out of _relist space
11  */
12 static int
13 regexec1(Reprog *progp, /* program to run */
14         char *bol,      /* string to run machine on */
15         Resub *mp,      /* subexpression elements */
16         int ms,         /* number of elements at mp */
17         Reljunk *j
18 )
19 {
20         int flag=0;
21         Reinst *inst;
22         Relist *tlp;
23         char *s;
24         int i, checkstart;
25         Rune r, *rp, *ep;
26         int n;
27         Relist* tl;             /* This list, next list */
28         Relist* nl;
29         Relist* tle;            /* ends of this and next list */
30         Relist* nle;
31         int match;
32         char *p;
33
34         match = 0;
35         checkstart = j->starttype;
36         if(mp)
37                 for(i=0; i<ms; i++) {
38                         mp[i].sp = 0;
39                         mp[i].ep = 0;
40                 }
41         j->relist[0][0].inst = 0;
42         j->relist[1][0].inst = 0;
43
44         /* Execute machine once for each character, including terminal NUL */
45         s = j->starts;
46         do{
47                 /* fast check for first char */
48                 if(checkstart) {
49                         switch(j->starttype) {
50                         case RUNE:
51                                 p = utfrune(s, j->startchar);
52                                 if(p == 0 || s == j->eol)
53                                         return match;
54                                 s = p;
55                                 break;
56                         case BOL:
57                                 if(s == bol)
58                                         break;
59                                 p = utfrune(s, '\n');
60                                 if(p == 0 || s == j->eol)
61                                         return match;
62                                 s = p+1;
63                                 break;
64                         }
65                 }
66                 r = *(uchar*)s;
67                 if(r < Runeself)
68                         n = 1;
69                 else
70                         n = chartorune(&r, s);
71
72                 /* switch run lists */
73                 tl = j->relist[flag];
74                 tle = j->reliste[flag];
75                 nl = j->relist[flag^=1];
76                 nle = j->reliste[flag];
77                 nl->inst = 0;
78
79                 /* Add first instruction to current list */
80                 if(match == 0)
81                         _renewemptythread(tl, progp->startinst, ms, s);
82
83                 /* Execute machine until current list is empty */
84                 for(tlp=tl; tlp->inst; tlp++){  /* assignment = */
85                         for(inst = tlp->inst; ; inst = inst->next){
86                                 switch(inst->type){
87                                 case RUNE:      /* regular character */
88                                         if(inst->r == r){
89                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
90                                                         return -1;
91                                         }
92                                         break;
93                                 case LBRA:
94                                         tlp->se.m[inst->subid].sp = s;
95                                         continue;
96                                 case RBRA:
97                                         tlp->se.m[inst->subid].ep = s;
98                                         continue;
99                                 case ANY:
100                                         if(r != '\n')
101                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
102                                                         return -1;
103                                         break;
104                                 case ANYNL:
105                                         if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
106                                                         return -1;
107                                         break;
108                                 case BOL:
109                                         if(s == bol || *(s-1) == '\n')
110                                                 continue;
111                                         break;
112                                 case EOL:
113                                         if(s == j->eol || r == 0 || r == '\n')
114                                                 continue;
115                                         break;
116                                 case CCLASS:
117                                         ep = inst->cp->end;
118                                         for(rp = inst->cp->spans; rp < ep; rp += 2)
119                                                 if(r >= rp[0] && r <= rp[1]){
120                                                         if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
121                                                                 return -1;
122                                                         break;
123                                                 }
124                                         break;
125                                 case NCCLASS:
126                                         ep = inst->cp->end;
127                                         for(rp = inst->cp->spans; rp < ep; rp += 2)
128                                                 if(r >= rp[0] && r <= rp[1])
129                                                         break;
130                                         if(rp == ep)
131                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
132                                                         return -1;
133                                         break;
134                                 case OR:
135                                         /* evaluate right choice later */
136                                         if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
137                                                 return -1;
138                                         /* efficiency: advance and re-evaluate */
139                                         continue;
140                                 case END:       /* Match! */
141                                         match = 1;
142                                         tlp->se.m[0].ep = s;
143                                         if(mp != 0)
144                                                 _renewmatch(mp, ms, &tlp->se);
145                                         break;
146                                 }
147                                 break;
148                         }
149                 }
150                 if(s == j->eol)
151                         break;
152                 checkstart = j->starttype && nl->inst==0;
153                 s += n;
154         }while(r);
155         return match;
156 }
157
158 static int
159 regexec2(Reprog *progp, /* program to run */
160         char *bol,      /* string to run machine on */
161         Resub *mp,      /* subexpression elements */
162         int ms,         /* number of elements at mp */
163         Reljunk *j
164 )
165 {
166         int rv;
167         Relist *relist0, *relist1;
168
169         /* mark space */
170         relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
171         if(relist0 == nil)
172                 return -1;
173         relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
174         if(relist1 == nil){
175                 free(relist1);
176                 return -1;
177         }
178         j->relist[0] = relist0;
179         j->relist[1] = relist1;
180         j->reliste[0] = relist0 + BIGLISTSIZE - 2;
181         j->reliste[1] = relist1 + BIGLISTSIZE - 2;
182
183         rv = regexec1(progp, bol, mp, ms, j);
184         free(relist0);
185         free(relist1);
186         return rv;
187 }
188
189 extern int
190 regexec(Reprog *progp,  /* program to run */
191         char *bol,      /* string to run machine on */
192         Resub *mp,      /* subexpression elements */
193         int ms)         /* number of elements at mp */
194 {
195         Reljunk j;
196         Relist relist0[LISTSIZE], relist1[LISTSIZE];
197         int rv;
198
199         /*
200          *  use user-specified starting/ending location if specified
201          */
202         j.starts = bol;
203         j.eol = 0;
204         if(mp && ms>0){
205                 if(mp->sp)
206                         j.starts = mp->sp;
207                 if(mp->ep)
208                         j.eol = mp->ep;
209         }
210         j.starttype = 0;
211         j.startchar = 0;
212         if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
213                 j.starttype = RUNE;
214                 j.startchar = progp->startinst->r;
215         }
216         if(progp->startinst->type == BOL)
217                 j.starttype = BOL;
218
219         /* mark space */
220         j.relist[0] = relist0;
221         j.relist[1] = relist1;
222         j.reliste[0] = relist0 + nelem(relist0) - 2;
223         j.reliste[1] = relist1 + nelem(relist1) - 2;
224
225         rv = regexec1(progp, bol, mp, ms, &j);
226         if(rv >= 0)
227                 return rv;
228         rv = regexec2(progp, bol, mp, ms, &j);
229         if(rv >= 0)
230                 return rv;
231         return -1;
232 }