]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libjson/json.c
libjson: fix for 21 bit runes, implement utf-16 surrogates
[plan9front.git] / sys / src / libjson / json.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <json.h>
5
6 typedef struct Lex Lex;
7
8 enum {
9         TEOF,
10         TSTRING = Runemax+1,
11         TNUM,
12         TNULL,
13         TFALSE,
14         TTRUE,
15 };
16
17 struct Lex
18 {
19         char *s;
20         int t;
21         double n;
22         char buf[4096];
23         Rune peeked;
24         jmp_buf jmp;
25         int canjmp;
26 };
27
28 static Rune
29 getch(Lex *l)
30 {
31         Rune r;
32
33         if(l->peeked){
34                 r = l->peeked;
35                 l->peeked = 0;
36                 return r;
37         }
38         l->s += chartorune(&r, l->s);
39         return r;
40 }
41
42 static Rune
43 peekch(Lex *l)
44 {
45         if(!l->peeked)
46                 l->peeked = getch(l);
47         return l->peeked;
48 }
49
50 static int
51 fixsurrogate(Rune *rp, Rune r2)
52 {
53         Rune r1;
54
55         r1 = *rp;
56         if(r1 >= 0xD800 && r1 <= 0xDBFF){
57                 if(r2 >= 0xDC00 && r2 <= 0xDFFF){
58                         *rp = 0x10000 + (((r1 - 0xD800)<<10) | (r2 - 0xDC00));
59                         return 0;
60                 }
61                 return 1;
62         } else
63         if(r1 >= 0xDC00 && r1 <= 0xDFFF){
64                 if(r2 >= 0xD800 && r2 <= 0xDBFF){
65                         *rp = 0x10000 + (((r2 - 0xD800)<<10) | (r1 - 0xDC00));
66                         return 0;
67                 }
68                 return 1;
69         }
70         return 0;
71 }
72
73 static int
74 lex(Lex *l)
75 {
76         Rune r, r2;
77         char *t;
78         int i;
79         char c;
80
81         for(;;){
82                 r = peekch(l);
83                 if(r != 0x20 && r != 0x09 && r != 0x0A && r != 0x0D)
84                         break;
85                 getch(l);
86         }
87         r = getch(l);
88         if(r == ']' && l->canjmp)
89                 longjmp(l->jmp, 1);
90         l->canjmp = 0;
91         if(r == 0 || r == '{' || r == '[' || r == ']' || r == '}' || r == ':' || r == ','){
92                 l->t = r;
93                 return 0;
94         }
95         if(r >= 0x80 || isalpha(r)){
96                 t = l->buf;
97                 for(;;){
98                         t += runetochar(t, &r);
99                         if(t >= l->buf + sizeof(l->buf)){
100                                 werrstr("json: literal too long");
101                                 return -1;
102                         }
103                         r = peekch(l);
104                         if(r < 0x80 && !isalpha(r))
105                                 break;
106                         getch(l);
107                 }
108                 *t = 0;
109                 if(strcmp(l->buf, "true") == 0)
110                         l->t = TTRUE;
111                 else if(strcmp(l->buf, "false") == 0)
112                         l->t = TFALSE;
113                 else if(strcmp(l->buf, "null") == 0)
114                         l->t = TNULL;
115                 else{
116                         werrstr("json: invalid literal");
117                         return -1;
118                 }
119                 return 0;
120         }
121         if(isdigit(r) || r == '-'){
122                 l->n = strtod(l->s-1, &l->s);
123                 l->t = TNUM;
124                 return 0;
125         }
126         if(r == '"'){
127                 r2 = 0;
128                 t = l->buf;
129                 for(;;){
130                         r = getch(l);
131                         if(r == '"')
132                                 break;
133                         if(r < ' '){
134                                 werrstr("json: invalid char in string %x", r);
135                                 return -1;
136                         }
137                         if(r == '\\'){
138                                 r = getch(l);
139                                 switch(r){
140                                 case 'n':
141                                         r = '\n';
142                                         break;
143                                 case 'r':
144                                         r = '\r';
145                                         break;
146                                 case 'u':
147                                         r = 0;
148                                         for(i = 0; i < 4; i++){
149                                                 if(!isxdigit(peekch(l)))
150                                                         break;
151
152                                                 c = getch(l);
153                                                 r *= 16;
154                                                 if(c >= '0' && c <= '9')
155                                                         r += c - '0';
156                                                 if(c >= 'a' && c <= 'f')
157                                                         r += c - 'a' + 10;
158                                                 else if(c >= 'A' && c <= 'F')
159                                                         r += c - 'A' + 10;
160                                         }
161                                         if(fixsurrogate(&r, r2)){
162                                                 r2 = r;
163                                                 continue;
164                                         }
165                                         break;
166                                 case 't':
167                                         r = '\t';
168                                         break;
169                                 case 'f':
170                                         r = '\f';
171                                         break;
172                                 case 'b':
173                                         r = '\b';
174                                         break;
175                                 case '"': case '/': case '\\':
176                                         break;
177                                 default:
178                                         werrstr("json: invalid escape sequence \\%C", r);
179                                         return -1;
180                                 }
181                         }
182                         r2 = 0;
183                         t += runetochar(t, &r);
184                         if(t >= l->buf + sizeof(l->buf)){
185                                 werrstr("json: string too long");
186                                 return -1;
187                         }
188                 }
189                 *t = 0;
190                 l->t = TSTRING;
191                 return 0;
192         }
193         werrstr("json: invalid char %C", peekch(l));
194         return -1;
195 }
196
197 static JSON*
198 jsonobj(Lex *l)
199 {
200         JSON *j;
201         JSONEl *e;
202         JSONEl **ln;
203         int obj;
204         
205         j = mallocz(sizeof(*j), 1);
206         if(j == nil)
207                 return nil;
208         if(lex(l) < 0){
209 error:
210                 free(j);
211                 return nil;
212         }
213         switch(l->t){
214         case TEOF:
215                 werrstr("json: unexpected eof");
216                 goto error;
217         case TNULL:
218                 j->t = JSONNull;
219                 break;
220         case TTRUE:
221                 j->t = JSONBool;
222                 j->n = 1;
223                 break;
224         case TFALSE:
225                 j->t = JSONBool;
226                 j->n = 0;
227                 break;
228         case TSTRING:
229                 j->t = JSONString;
230                 j->s = strdup(l->buf);
231                 if(j->s == nil)
232                         goto error;
233                 break;
234         case TNUM:
235                 j->t = JSONNumber;
236                 j->n = l->n;
237                 break;
238         case '{':
239         case '[':
240                 obj = l->t == '{';
241                 ln = &j->first;
242                 e = nil;
243                 if(obj){
244                         j->t = JSONObject;
245                         if(lex(l) < 0)
246                                 goto abort;
247                         if(l->t == '}')
248                                 return j;
249                         goto firstobj;
250                 }else{
251                         j->t = JSONArray;
252                         l->canjmp = 1;
253                         if(setjmp(l->jmp) > 0){
254                                 free(e);
255                                 return j;
256                         }
257                 }
258                 for(;;){
259                         if(obj){
260                                 if(lex(l) < 0)
261                                         goto abort;
262                         firstobj:
263                                 if(l->t != TSTRING){
264                                         werrstr("json: syntax error, not string");
265                                         goto abort;
266                                 }
267                                 e = mallocz(sizeof(*e), 1);
268                                 if(e == nil)
269                                         goto abort;
270                                 e->name = strdup(l->buf);
271                                 if(e->name == nil || lex(l) < 0){
272                                         free(e);
273                                         goto abort;
274                                 }
275                                 if(l->t != ':'){
276                                         werrstr("json: syntax error, not colon");
277                                         free(e);
278                                         goto abort;
279                                 }
280                         }else{
281                                 e = mallocz(sizeof(*e), 1);
282                                 if(e == nil)
283                                         goto abort;
284                         }
285                         e->val = jsonobj(l);
286                         if(e->val == nil){
287                                 free(e);
288                                 goto abort;
289                         }
290                         *ln = e;
291                         ln = &e->next;
292                         if(lex(l) < 0)
293                                 goto abort;
294                         if(l->t == (obj ? '}' : ']'))
295                                 break;
296                         if(l->t != ','){
297                                 werrstr("json: syntax error, neither comma nor ending paren");
298                                 goto abort;
299                         }
300                 }
301                 break;
302         abort:
303                 jsonfree(j);
304                 return nil;
305         case ']': case '}': case ',': case ':':
306                 werrstr("json: unexpected %C", l->t);
307                 goto error;
308         default:
309                 werrstr("json: the front fell off");
310                 goto error;
311         }
312         return j;
313 }
314
315 JSON*
316 jsonparse(char *s)
317 {
318         Lex l;
319
320         memset(&l, 0, sizeof(l));
321         l.s = s;
322         return jsonobj(&l);
323 }
324
325 void
326 jsonfree(JSON *j)
327 {
328         JSONEl *e, *f;
329
330         switch(j->t){
331         case JSONString:
332                 if(j->s)
333                         free(j->s);
334                 break;
335         case JSONArray: case JSONObject:
336                 for(e = j->first; e != nil; e = f){
337                         if(e->name)
338                                 free(e->name);
339                         jsonfree(e->val);
340                         f = e->next;
341                         free(e);
342                 }
343         }
344         free(j);
345 }
346
347 JSON *
348 jsonbyname(JSON *j, char *n)
349 {
350         JSONEl *e;
351         
352         if(j->t != JSONObject){
353                 werrstr("not an object");
354                 return nil;
355         }
356         for(e = j->first; e != nil; e = e->next)
357                 if(strcmp(e->name, n) == 0)
358                         return e->val;
359         werrstr("key '%s' not found", n);
360         return nil;
361 }
362
363 char *
364 jsonstr(JSON *j)
365 {
366         if(j == nil)
367                 return nil;
368         if(j->t != JSONString){
369                 werrstr("not a string");
370                 return nil;
371         }
372         return j->s;
373 }