]> git.lizzy.rs Git - uwu-lang.git/blob - src/parse.c
e8f1d381ee92ac7f2fb0ac9b4be224b8fe99ecb6
[uwu-lang.git] / src / parse.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <ctype.h>
4 #include "common/err.h"
5 #include "parse.h"
6
7 #define DEBUG 0
8
9 #if DEBUG
10 #define DBG(FN) printf("%s %s %s\n", FN, expression_types[state->expression->type], (state->expression->type == EX_FNCALL && state->expression->value.str_value) ? state->expression->value.str_value : "");
11 #else
12 #define DBG(FN)
13 #endif
14
15 #if DEBUG
16 static char *expression_types[EX_FNCALL + 1] = {
17         "uninitialized",
18         "integer-literal",
19         "string-literal",
20         "argument-number",
21         "function-name",
22         "function-call",
23 };
24
25 static void print_expression(ParseExpression *expr, int indent)
26 {
27         for (int i = 0; i < indent; i++)
28                 printf("\t");
29
30         printf("%s ", expression_types[expr->type]);
31
32         if (expr->type == EX_INTLIT || expr->type == EX_ARGNUM)
33                 printf("%d\n", expr->value.int_value);
34         else
35                 printf("\"%s\"\n", expr->value.str_value);
36
37         if (expr->type == EX_FNCALL)
38                 for (size_t i = 0; i < expr->num_children; i++)
39                         print_expression(expr->children[i], indent + 1);
40 }
41
42 static void print_ast(AbstractSyntaxTree tree)
43 {
44         printf("\n[Abstract Syntax Tree]\n\n");
45
46         for (size_t f = 0; f < tree.num_functions; f++) {
47                 ParseFunction *function = tree.functions[f];
48
49                 printf("function %s\n", function->name);
50                 print_expression(function->expression, 1);
51         }
52 }
53 #endif
54
55 static void buffer_append(ParseState *state, char c)
56 {
57         state->buffer = realloc(state->buffer, ++state->buffer_size);
58         state->buffer[state->buffer_size - 1] = c;
59 }
60
61 static char *buffer_terminate(ParseState *state)
62 {
63         buffer_append(state, '\0');
64
65         char *buffer = state->buffer;
66
67         state->buffer = NULL;
68         state->buffer_size = 0;
69
70         return buffer;
71 }
72
73 static void start_arg(ParseState *state)
74 {
75         DBG(__FUNCTION__)
76
77         ParseExpression *parent = state->expression;
78         parent->children = realloc(parent->children, sizeof *parent->children * ++parent->num_children);
79         ParseExpression *child = parent->children[parent->num_children - 1] = malloc(sizeof *child);
80
81         child->type = EX_UNINIT;
82         child->parent = parent;
83
84         state->expression = child;
85 }
86
87 static bool continue_arg(ParseState *state, char c)
88 {
89         DBG(__FUNCTION__)
90
91         if (c == ',')
92                 start_arg(state);
93         else if (c == ')')
94                 state->expression = state->expression->parent;
95         else if (! isspace(c))
96                 return false;
97
98         return true;
99 }
100
101 static bool finish_arg(ParseState *state, char c)
102 {
103         state->expression = state->expression->parent;
104
105         if (state->expression)
106                 continue_arg(state, c);
107         else if (! isspace(c))
108                 return false;
109
110         return true;
111 }
112
113 static bool parse_expression_init(ParseState *state, char c)
114 {
115         DBG(__FUNCTION__)
116
117         if (c == ',')
118                 return false;
119
120         if (isspace(c))
121                 return true;
122
123         switch (c) {
124                 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
125                         state->expression->type = EX_INTLIT;
126                         buffer_append(state, c);
127                         return true;
128
129                 case '"':
130                         state->expression->type = EX_STRLIT;
131                         return true;
132
133                 case '$':
134                         state->expression->type = EX_ARGNUM;
135                         return true;
136
137                 case '&':
138                         state->expression->type = EX_FNNAME;
139                         return true;
140
141                 default:
142                         state->expression->type = EX_FNCALL;
143                         state->expression->value.str_value = NULL;
144                         buffer_append(state, c);
145                         return true;
146         }
147 }
148
149 static bool parse_expression_finish(ParseState *state, char c)
150 {
151         DBG(__FUNCTION__)
152
153         if (state->expression->type == EX_ARGNUM && state->buffer_size == 0)
154                 return false;
155
156         char *buffer_read = buffer_terminate(state);
157
158         if (state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) {
159                 state->expression->value.int_value = atoi(buffer_read);
160                 free(buffer_read);
161         } else {
162                 state->expression->value.str_value = buffer_read;
163         }
164
165         if (state->expression->type == EX_FNCALL) {
166                 state->expression->num_children = 0;
167                 state->expression->children = NULL;
168
169                 if (c == '(')
170                         start_arg(state);
171                 else
172                         return finish_arg(state, c);
173         } else {
174                 if (c == ',' || c == ')')
175                         return finish_arg(state, c);
176                 else
177                         state->expression = state->expression->parent;
178         }
179
180         return true;
181 }
182
183 static bool parse_expression_continue(ParseState *state, char c)
184 {
185         DBG(__FUNCTION__)
186
187         if (state->expression->type == EX_FNCALL && state->expression->value.str_value)
188                 return continue_arg(state, c);
189
190         if (
191                 state->expression->type == EX_STRLIT
192                 ? c == '"'
193                 : (
194                         (state->expression->type == EX_FNCALL && c == '(')
195                         || isspace(c) || c == ',' || c == ')'
196                 )
197         ) {
198                 return parse_expression_finish(state, c);
199         } else {
200                 if ((state->expression->type == EX_INTLIT || state->expression->type == EX_ARGNUM) && ! isdigit(c))
201                         return false;
202
203                 if ((state->expression->type == EX_FNNAME || state->expression->type == EX_FNCALL) && (c == '&' || c == '$'))
204                         return false;
205
206                 buffer_append(state, c);
207         }
208
209         return true;
210 }
211
212 static bool parse_expression(ParseState *state, char c)
213 {
214         DBG(__FUNCTION__)
215
216         return state->expression->type == EX_UNINIT
217                 ? parse_expression_init(state, c)
218                 : parse_expression_continue(state, c);
219 }
220
221 static bool parse_function(ParseState *state, char c)
222 {
223 #if DEBUG
224         printf("%s\n", __FUNCTION__);
225 #endif
226
227         if (c == '\"' || c == '$' || c == ':' || c == ',' || c == '&' || c == '(' || c == ')' || isdigit(c))
228                 return false;
229
230         if (! isspace(c)) {
231                 buffer_append(state, c);
232                 return true;
233         }
234
235         if (state->buffer_size == 0)
236                 return true;
237
238         char *name = buffer_terminate(state);
239
240         state->expression = malloc(sizeof *state->expression);
241         state->expression->type = EX_UNINIT;
242         state->expression->parent = NULL;
243
244         state->tree.functions = realloc(state->tree.functions, sizeof *state->tree.functions * ++state->tree.num_functions);
245         *(state->tree.functions[state->tree.num_functions - 1] = malloc(sizeof(ParseFunction))) = (ParseFunction) {
246                 .name = name,
247                 .expression = state->expression,
248         };
249
250         return true;
251 }
252
253 static bool parse_character(ParseState *state, char c)
254 {
255 #if DEBUG
256         printf("\nparse_character ");
257
258         if (isspace(c))
259                 printf("<SPACE>");
260         else
261                 printf("%c", c);
262
263          printf("\n");
264 #endif
265
266         return state->expression
267                 ? parse_expression(state, c)
268                 : parse_function(state, c);
269 }
270
271 AbstractSyntaxTree parse_file(const char *filename)
272 {
273         ParseState state = {
274                 .tree = {
275                         .num_functions = 0,
276                         .functions = NULL,
277                 },
278                 .buffer_size = 0,
279                 .buffer = NULL,
280                 .expression = NULL,
281         };
282
283         int lines = 1;
284
285         FILE *f = fopen(filename, "r");
286
287         if (! f)
288                 syserror("fopen", f);
289
290 #if DEBUG
291         printf("[File %s]\n[Line %d]\n", filename, lines);
292 #endif
293
294         while (true) {
295                 char c = getc(f);
296
297                 if (feof(f))
298                         break;
299
300                 if (ferror(f))
301                         syserror("getc", f);
302
303                 if (c == '\n')
304                         ++lines;
305
306 #if DEBUG
307                 if (c == '\n')
308                         printf("\n[Line %d]\n", lines);
309 #endif
310
311                 if (! parse_character(&state, c))
312                         error("syntax error: in file %s, line %d\n", filename, lines);
313         }
314
315         if (state.buffer || state.expression)
316                 error("syntax error: at end of file %s\n", filename);
317
318         fclose(f);
319
320 #if DEBUG
321         print_ast(state.tree);
322 #endif
323
324         return state.tree;
325 }