]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libregexp/rregexec.c
Import sources from 2011-03-30 iso image
[plan9front.git] / sys / src / libregexp / rregexec.c
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5
6 /*
7  *  return      0 if no match
8  *              >0 if a match
9  *              <0 if we ran out of _relist space
10  */
11 static int
12 rregexec1(Reprog *progp,        /* program to run */
13         Rune *bol,              /* string to run machine on */
14         Resub *mp,              /* subexpression elements */
15         int ms,                 /* number of elements at mp */
16         Reljunk *j)
17 {
18         int flag=0;
19         Reinst *inst;
20         Relist *tlp;
21         Rune *s;
22         int i, checkstart;
23         Rune r, *rp, *ep;
24         Relist* tl;             /* This list, next list */
25         Relist* nl;
26         Relist* tle;            /* ends of this and next list */
27         Relist* nle;
28         int match;
29         Rune *p;
30
31         match = 0;
32         checkstart = j->startchar;
33         if(mp)
34                 for(i=0; i<ms; i++) {
35                         mp[i].rsp = 0;
36                         mp[i].rep = 0;
37                 }
38         j->relist[0][0].inst = 0;
39         j->relist[1][0].inst = 0;
40
41         /* Execute machine once for each character, including terminal NUL */
42         s = j->rstarts;
43         do{
44                 /* fast check for first char */
45                 if(checkstart) {
46                         switch(j->starttype) {
47                         case RUNE:
48                                 p = runestrchr(s, j->startchar);
49                                 if(p == 0 || s == j->reol)
50                                         return match;
51                                 s = p;
52                                 break;
53                         case BOL:
54                                 if(s == bol)
55                                         break;
56                                 p = runestrchr(s, '\n');
57                                 if(p == 0 || s == j->reol)
58                                         return match;
59                                 s = p+1;
60                                 break;
61                         }
62                 }
63
64                 r = *s;
65
66                 /* switch run lists */
67                 tl = j->relist[flag];
68                 tle = j->reliste[flag];
69                 nl = j->relist[flag^=1];
70                 nle = j->reliste[flag];
71                 nl->inst = 0;
72
73                 /* Add first instruction to current list */
74                 _rrenewemptythread(tl, progp->startinst, ms, s);
75
76                 /* Execute machine until current list is empty */
77                 for(tlp=tl; tlp->inst; tlp++){
78                         for(inst=tlp->inst; ; inst = inst->next){
79                                 switch(inst->type){
80                                 case RUNE:      /* regular character */
81                                         if(inst->r == r)
82                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
83                                                         return -1;
84                                         break;
85                                 case LBRA:
86                                         tlp->se.m[inst->subid].rsp = s;
87                                         continue;
88                                 case RBRA:
89                                         tlp->se.m[inst->subid].rep = s;
90                                         continue;
91                                 case ANY:
92                                         if(r != '\n')
93                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
94                                                         return -1;
95                                         break;
96                                 case ANYNL:
97                                         if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
98                                                         return -1;
99                                         break;
100                                 case BOL:
101                                         if(s == bol || *(s-1) == '\n')
102                                                 continue;
103                                         break;
104                                 case EOL:
105                                         if(s == j->reol || r == 0 || r == '\n')
106                                                 continue;
107                                         break;
108                                 case CCLASS:
109                                         ep = inst->cp->end;
110                                         for(rp = inst->cp->spans; rp < ep; rp += 2)
111                                                 if(r >= rp[0] && r <= rp[1]){
112                                                         if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
113                                                                 return -1;
114                                                         break;
115                                                 }
116                                         break;
117                                 case NCCLASS:
118                                         ep = inst->cp->end;
119                                         for(rp = inst->cp->spans; rp < ep; rp += 2)
120                                                 if(r >= rp[0] && r <= rp[1])
121                                                         break;
122                                         if(rp == ep)
123                                                 if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
124                                                         return -1;
125                                         break;
126                                 case OR:
127                                         /* evaluate right choice later */
128                                         if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
129                                                 return -1;
130                                         /* efficiency: advance and re-evaluate */
131                                         continue;
132                                 case END:       /* Match! */
133                                         match = 1;
134                                         tlp->se.m[0].rep = s;
135                                         if(mp != 0)
136                                                 _renewmatch(mp, ms, &tlp->se);
137                                         break;
138                                 }
139                                 break;
140                         }
141                 }
142                 if(s == j->reol)
143                         break;
144                 checkstart = j->startchar && nl->inst==0;
145                 s++;
146         }while(r);
147         return match;
148 }
149
150 static int
151 rregexec2(Reprog *progp,        /* program to run */
152         Rune *bol,      /* string to run machine on */
153         Resub *mp,      /* subexpression elements */
154         int ms,         /* number of elements at mp */
155         Reljunk *j
156 )
157 {
158         Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
159
160         /* mark space */
161         j->relist[0] = relist0;
162         j->relist[1] = relist1;
163         j->reliste[0] = relist0 + nelem(relist0) - 2;
164         j->reliste[1] = relist1 + nelem(relist1) - 2;
165
166         return rregexec1(progp, bol, mp, ms, j);
167 }
168
169 extern int
170 rregexec(Reprog *progp, /* program to run */
171         Rune *bol,      /* string to run machine on */
172         Resub *mp,      /* subexpression elements */
173         int ms)         /* number of elements at mp */
174 {
175         Reljunk j;
176         Relist relist0[LISTSIZE], relist1[LISTSIZE];
177         int rv;
178
179         /*
180          *  use user-specified starting/ending location if specified
181          */
182         j.rstarts = bol;
183         j.reol = 0;
184         if(mp && ms>0){
185                 if(mp->sp)
186                         j.rstarts = mp->rsp;
187                 if(mp->ep)
188                         j.reol = mp->rep;
189         }
190         j.starttype = 0;
191         j.startchar = 0;
192         if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
193                 j.starttype = RUNE;
194                 j.startchar = progp->startinst->r;
195         }
196         if(progp->startinst->type == BOL)
197                 j.starttype = BOL;
198
199         /* mark space */
200         j.relist[0] = relist0;
201         j.relist[1] = relist1;
202         j.reliste[0] = relist0 + nelem(relist0) - 2;
203         j.reliste[1] = relist1 + nelem(relist1) - 2;
204
205         rv = rregexec1(progp, bol, mp, ms, &j);
206         if(rv >= 0)
207                 return rv;
208         rv = rregexec2(progp, bol, mp, ms, &j);
209         if(rv >= 0)
210                 return rv;
211         return -1;
212 }