]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/rc/lex.c
disk/format: implement long name support
[plan9front.git] / sys / src / cmd / rc / lex.c
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "getflags.h"
5 #include "fns.h"
6 int getnext(void);
7
8 int
9 wordchr(int c)
10 {
11         return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
12 }
13
14 int
15 idchr(int c)
16 {
17         /*
18          * Formerly:
19          * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
20          *      || c=='_' || c=='*';
21          */
22         return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
23 }
24 int future = EOF;
25 int doprompt = 1;
26 int inquote;
27 int incomm;
28 /*
29  * Look ahead in the input stream
30  */
31
32 int
33 nextc(void)
34 {
35         if(future==EOF)
36                 future = getnext();
37         return future;
38 }
39 /*
40  * Consume the lookahead character.
41  */
42
43 int
44 advance(void)
45 {
46         int c = nextc();
47         lastc = future;
48         future = EOF;
49         return c;
50 }
51 /*
52  * read a character from the input stream
53  */     
54
55 int
56 getnext(void)
57 {
58         int c;
59         static int peekc = EOF;
60         if(peekc!=EOF){
61                 c = peekc;
62                 peekc = EOF;
63                 return c;
64         }
65         if(runq->eof)
66                 return EOF;
67         if(doprompt)
68                 pprompt();
69         c = rchr(runq->cmdfd);
70         if(!inquote && c=='\\'){
71                 c = rchr(runq->cmdfd);
72                 if(c=='\n' && !incomm){         /* don't continue a comment */
73                         doprompt = 1;
74                         c=' ';
75                 }
76                 else{
77                         peekc = c;
78                         c='\\';
79                 }
80         }
81         doprompt = doprompt || c=='\n' || c==EOF;
82         if(c==EOF)
83                 runq->eof++;
84         else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
85         return c;
86 }
87
88 void
89 pprompt(void)
90 {
91         var *prompt;
92         if(runq->iflag){
93                 pstr(err, promptstr);
94                 flush(err);
95                 if(newwdir){
96                         char dir[4096];
97                         int fd;
98                         if((fd=open("/dev/wdir", OWRITE))>=0){
99                                 getwd(dir, sizeof(dir));
100                                 write(fd, dir, strlen(dir));
101                                 close(fd);
102                         }
103                         newwdir = 0;
104                 }
105                 prompt = vlook("prompt");
106                 if(prompt->val && prompt->val->next)
107                         promptstr = prompt->val->next->word;
108                 else
109                         promptstr="\t";
110         }
111         runq->lineno++;
112         doprompt = 0;
113 }
114
115 void
116 skipwhite(void)
117 {
118         int c;
119         for(;;){
120                 c = nextc();
121                 /* Why did this used to be  if(!inquote && c=='#') ?? */
122                 if(c=='#'){
123                         incomm = 1;
124                         for(;;){
125                                 c = nextc();
126                                 if(c=='\n' || c==EOF) {
127                                         incomm = 0;
128                                         break;
129                                 }
130                                 advance();
131                         }
132                 }
133                 if(c==' ' || c=='\t')
134                         advance();
135                 else return;
136         }
137 }
138
139 void
140 skipnl(void)
141 {
142         int c;
143         for(;;){
144                 skipwhite();
145                 c = nextc();
146                 if(c!='\n')
147                         return;
148                 advance();
149         }
150 }
151
152 int
153 nextis(int c)
154 {
155         if(nextc()==c){
156                 advance();
157                 return 1;
158         }
159         return 0;
160 }
161
162 char*
163 addtok(char *p, int val)
164 {
165         if(p==0)
166                 return 0;
167         if(p==&tok[NTOK-1]){
168                 *p = 0;
169                 yyerror("token buffer too short");
170                 return 0;
171         }
172         *p++=val;
173         return p;
174 }
175
176 char*
177 addutf(char *p, int c)
178 {
179         uchar b, m;
180         int i;
181
182         p = addtok(p, c);       /* 1-byte UTF runes are special */
183         if(onebyte(c))
184                 return p;
185
186         m = 0xc0;
187         b = 0x80;
188         for(i=1; i < UTFmax; i++){
189                 if((c&m) == b)
190                         break;
191                 p = addtok(p, advance());
192                 b = m;
193                 m = (m >> 1)|0x80;
194         }
195         return p;
196 }
197
198 int lastdol;    /* was the last token read '$' or '$#' or '"'? */
199 int lastword;   /* was the last token read a word or compound word terminator? */
200
201 int
202 yylex(void)
203 {
204         int c, d = nextc();
205         char *w = tok;
206         struct tree *t;
207         yylval.tree = 0;
208         /*
209          * Embarassing sneakiness:  if the last token read was a quoted or unquoted
210          * WORD then we alter the meaning of what follows.  If the next character
211          * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
212          * if the next character is the first character of a simple or compound word,
213          * we insert a `^' before it.
214          */
215         if(lastword){
216                 lastword = 0;
217                 if(d=='('){
218                         advance();
219                         strcpy(tok, "( [SUB]");
220                         return SUB;
221                 }
222                 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
223                         strcpy(tok, "^");
224                         return '^';
225                 }
226         }
227         inquote = 0;
228         skipwhite();
229         switch(c = advance()){
230         case EOF:
231                 lastdol = 0;
232                 strcpy(tok, "EOF");
233                 return EOF;
234         case '$':
235                 lastdol = 1;
236                 if(nextis('#')){
237                         strcpy(tok, "$#");
238                         return COUNT;
239                 }
240                 if(nextis('"')){
241                         strcpy(tok, "$\"");
242                         return '"';
243                 }
244                 strcpy(tok, "$");
245                 return '$';
246         case '&':
247                 lastdol = 0;
248                 if(nextis('&')){
249                         skipnl();
250                         strcpy(tok, "&&");
251                         return ANDAND;
252                 }
253                 strcpy(tok, "&");
254                 return '&';
255         case '|':
256                 lastdol = 0;
257                 if(nextis(c)){
258                         skipnl();
259                         strcpy(tok, "||");
260                         return OROR;
261                 }
262         case '<':
263         case '>':
264                 lastdol = 0;
265                 /*
266                  * funny redirection tokens:
267                  *      redir:  arrow | arrow '[' fd ']'
268                  *      arrow:  '<' | '<<' | '>' | '>>' | '|'
269                  *      fd:     digit | digit '=' | digit '=' digit
270                  *      digit:  '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
271                  * some possibilities are nonsensical and get a message.
272                  */
273                 *w++=c;
274                 t = newtree();
275                 switch(c){
276                 case '|':
277                         t->type = PIPE;
278                         t->fd0 = 1;
279                         t->fd1 = 0;
280                         break;
281                 case '>':
282                         t->type = REDIR;
283                         if(nextis(c)){
284                                 t->rtype = APPEND;
285                                 *w++=c;
286                         }
287                         else t->rtype = WRITE;
288                         t->fd0 = 1;
289                         break;
290                 case '<':
291                         t->type = REDIR;
292                         if(nextis(c)){
293                                 t->rtype = HERE;
294                                 *w++=c;
295                         } else if (nextis('>')){
296                                 t->rtype = RDWR;
297                                 *w++=c;
298                         } else t->rtype = READ;
299                         t->fd0 = 0;
300                         break;
301                 }
302                 if(nextis('[')){
303                         *w++='[';
304                         c = advance();
305                         *w++=c;
306                         if(c<'0' || '9'<c){
307                         RedirErr:
308                                 *w = 0;
309                                 yyerror(t->type==PIPE?"pipe syntax"
310                                                 :"redirection syntax");
311                                 return EOF;
312                         }
313                         t->fd0 = 0;
314                         do{
315                                 t->fd0 = t->fd0*10+c-'0';
316                                 *w++=c;
317                                 c = advance();
318                         }while('0'<=c && c<='9');
319                         if(c=='='){
320                                 *w++='=';
321                                 if(t->type==REDIR)
322                                         t->type = DUP;
323                                 c = advance();
324                                 if('0'<=c && c<='9'){
325                                         t->rtype = DUPFD;
326                                         t->fd1 = t->fd0;
327                                         t->fd0 = 0;
328                                         do{
329                                                 t->fd0 = t->fd0*10+c-'0';
330                                                 *w++=c;
331                                                 c = advance();
332                                         }while('0'<=c && c<='9');
333                                 }
334                                 else{
335                                         if(t->type==PIPE)
336                                                 goto RedirErr;
337                                         t->rtype = CLOSE;
338                                 }
339                         }
340                         if(c!=']'
341                         || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
342                                 goto RedirErr;
343                         *w++=']';
344                 }
345                 *w='\0';
346                 yylval.tree = t;
347                 if(t->type==PIPE)
348                         skipnl();
349                 return t->type;
350         case '\'':
351                 lastdol = 0;
352                 lastword = 1;
353                 inquote = 1;
354                 for(;;){
355                         c = advance();
356                         if(c==EOF)
357                                 break;
358                         if(c=='\''){
359                                 if(nextc()!='\'')
360                                         break;
361                                 advance();
362                         }
363                         w = addutf(w, c);
364                 }
365                 if(w!=0)
366                         *w='\0';
367                 t = token(tok, WORD);
368                 t->quoted = 1;
369                 yylval.tree = t;
370                 return t->type;
371         }
372         if(!wordchr(c)){
373                 lastdol = 0;
374                 tok[0] = c;
375                 tok[1]='\0';
376                 return c;
377         }
378         for(;;){
379                 if(c=='*' || c=='[' || c=='?' || c==GLOB)
380                         w = addtok(w, GLOB);
381                 w = addutf(w, c);
382                 c = nextc();
383                 if(lastdol?!idchr(c):!wordchr(c)) break;
384                 advance();
385         }
386
387         lastword = 1;
388         lastdol = 0;
389         if(w!=0)
390                 *w='\0';
391         t = klook(tok);
392         if(t->type!=WORD)
393                 lastword = 0;
394         t->quoted = 0;
395         yylval.tree = t;
396         return t->type;
397 }