fixed bool parsing bug -- off by one on the string size enforcement
[OpenSRF.git] / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24
25 jsonObject* jsonParseString( char* string) {
26         return json_parse_string( string );
27 }
28
29 jsonObject* jsonParseStringFmt( char* string, ... ) {
30         VA_LIST_TO_STRING(string);
31         return json_parse_string( VA_BUF );
32 }
33
34
35
36 //jsonObject* (*jsonParseString) (char* str) = &_jsonParseString;
37
38 jsonObject* json_parse_string(char* string) {
39
40         if(string == NULL) return NULL;
41
42         current_strlen = strlen(string);
43
44         if(current_strlen == 0) 
45                 return NULL;
46
47         unsigned long index = 0;
48
49         json_eat_ws(string, &index, 1, current_strlen); /* remove leading whitespace */
50         if(index == current_strlen) return NULL;
51
52         jsonObject* obj = jsonNewObject(NULL);
53
54         int status = _json_parse_string(string, &index, obj, current_strlen);
55         if(!status) return obj;
56
57         if(status == -2) {
58                 jsonObjectFree(obj);
59                 return NULL;
60         }
61
62         return NULL;
63 }
64
65
66 int _json_parse_string(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
67         if( !string || !index || *index >= current_strlen) return -2;
68
69         int status = 0; /* return code from parsing routines */
70         char* classname = NULL; /* object class hint */
71         json_eat_ws(string, index, 1, current_strlen); /* remove leading whitespace */
72
73         char c = string[*index];
74
75         /* remove any leading comments */
76         if( c == '/' ) { 
77
78                 while(1) {
79                         (*index)++; /* move to second comment char */
80                         status = json_eat_comment(string, index, &classname, 1, current_strlen);
81                         if(status) return status;
82
83                         json_eat_ws(string, index, 1, current_strlen);
84                         c = string[*index];
85                         if(c != '/')
86                                 break;
87                 }
88         }
89
90         json_eat_ws(string, index, 1, current_strlen); /* remove leading whitespace */
91
92         if(*index >= current_strlen)
93                 return -2;
94
95         switch(c) {
96                                 
97                 /* json string */
98                 case '"': 
99                         (*index)++;
100                         status = json_parse_json_string(string, index, obj, current_strlen); break;
101
102                 /* json array */
103                 case '[':
104                         (*index)++;
105                         status = json_parse_json_array(string, index, obj, current_strlen);                     
106                         break;
107
108                 /* json object */
109                 case '{':
110                         (*index)++;
111                         status = json_parse_json_object(string, index, obj, current_strlen);
112                         break;
113
114                 /* NULL */
115                 case 'n':
116                 case 'N':
117                         status = json_parse_json_null(string, index, obj, current_strlen);
118                         break;
119                         
120
121                 /* true, false */
122                 case 'f':
123                 case 'F':
124                 case 't':
125                 case 'T':
126                         status = json_parse_json_bool(string, index, obj, current_strlen);
127                         break;
128
129                 default:
130                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
131                                 status = json_parse_json_number(string, index, obj, current_strlen);    
132                                 if(status) return status;
133                                 break;
134                         }
135
136                         (*index)--;
137                         /* we should never get here */
138                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
139         }       
140
141         if(status) return status;
142
143         json_eat_ws(string, index, 1, current_strlen);
144
145         if( *index < current_strlen ) {
146                 /* remove any trailing comments */
147                 c = string[*index];
148                 if( c == '/' ) { 
149                         (*index)++;
150                         status = json_eat_comment(string, index, NULL, 0, current_strlen);
151                         if(status) return status;
152                 }
153         }
154
155         if(classname){
156                 jsonObjectSetClass(obj, classname);
157                 free(classname);
158         }
159
160         return 0;
161 }
162
163
164 int json_parse_json_null(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
165
166         if(*index >= (current_strlen - 3)) {
167                 return json_handle_error(string, index, 
168                         "_parse_json_null(): invalid null" );
169         }
170
171         if(!strncasecmp(string + (*index), "null", 4)) {
172                 (*index) += 4;
173                 obj->type = JSON_NULL;
174                 return 0;
175         } else {
176                 return json_handle_error(string, index,
177                         "_parse_json_null(): invalid null" );
178         }
179 }
180
181 /* should be at the first character of the bool at this point */
182 int json_parse_json_bool(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
183         if( ! string || ! obj || *index >= current_strlen ) return -1;
184
185         char* ret = "json_parse_json_bool(): truncated bool";
186
187         if( *index > (current_strlen - 4))
188                 return json_handle_error(string, index, ret);
189
190         if(!strncasecmp( string + (*index), "true", 4)) {
191                 (*index) += 4;
192                 obj->value.b = 1;
193                 obj->type = JSON_BOOL;
194                 return 0;
195         }
196
197         if( *index > (current_strlen - 5))
198                 return json_handle_error(string, index, ret);
199         
200         if(!strncasecmp( string + (*index), "false", 5)) {
201                 (*index) += 5;
202                 obj->value.b = 0;
203                 obj->type = JSON_BOOL;
204                 return 0;
205         }
206
207         return json_handle_error(string, index, ret);
208 }
209
210
211 /* expecting the first character of the number */
212 int json_parse_json_number(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
213         if( ! string || ! obj || *index >= current_strlen ) return -1;
214
215         growing_buffer* buf = buffer_init(64);
216         char c = string[*index];
217
218         int done = 0;
219         int dot_seen = 0;
220
221         /* negative number? */
222         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
223
224         c = string[*index];
225
226         while(*index < current_strlen) {
227
228                 if(is_number(c)) {
229                         buffer_add_char(buf, c);
230                 }
231
232                 else if( c == '.' ) {
233                         if(dot_seen) {
234                                 buffer_free(buf);
235                                 return json_handle_error(string, index, 
236                                         "json_parse_json_number(): malformed json number");
237                         }
238                         dot_seen = 1;
239                         buffer_add_char(buf, c);
240                 } else {
241                         done = 1; break;
242                 }
243
244                 (*index)++;
245                 c = string[*index];
246                 if(done) break;
247         }
248
249         obj->type = JSON_NUMBER;
250         obj->value.n = strtod(buf->buf, NULL);
251         buffer_free(buf);
252         return 0;
253 }
254
255 /* index should point to the character directly following the '['.  when done
256  * index will point to the character directly following the ']' character
257  */
258 int json_parse_json_array(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
259
260         if( ! string || ! obj || ! index || *index >= current_strlen ) return -1;
261
262         int status = 0;
263         int in_parse = 0; /* true if this array already contains one item */
264         obj->type = JSON_ARRAY;
265         int set = 0;
266         int done = 0;
267
268         while(*index < current_strlen) {
269
270                 json_eat_ws(string, index, 1, current_strlen);
271
272                 if(string[*index] == ']') {
273                         (*index)++;
274                         done = 1;
275                         break;
276                 }
277
278                 if(in_parse) {
279                         json_eat_ws(string, index, 1, current_strlen);
280                         if(string[*index] != ',') {
281                                 return json_handle_error(string, index,
282                                         "json_parse_json_array(): array item not followed by a ','");
283                         }
284                         (*index)++;
285                         json_eat_ws(string, index, 1, current_strlen);
286                 }
287
288                 jsonObject* item = jsonNewObject(NULL);
289
290                 #ifndef STRICT_JSON_READ
291                 if(*index < current_strlen) {
292                         if(string[*index] == ',' || string[*index] == ']') {
293                                 status = 0;
294                                 set = 1;
295                         }
296                 }
297                 if(!set) status = _json_parse_string(string, index, item, current_strlen);
298
299                 #else
300                 status = _json_parse_string(string, index, item, current_strlen);
301                 #endif
302
303                 if(status) { jsonObjectFree(item); return status; }
304                 jsonObjectPush(obj, item);
305                 in_parse = 1;
306                 set = 0;
307         }
308
309         if(!done)
310                 return json_handle_error(string, index,
311                         "json_parse_json_array(): array not closed");
312
313         return 0;
314 }
315
316
317 /* index should point to the character directly following the '{'.  when done
318  * index will point to the character directly following the '}'
319  */
320 int json_parse_json_object(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
321         if( ! string || !obj || ! index || *index >= current_strlen ) return -1;
322
323         obj->type = JSON_HASH;
324         int status;
325         int in_parse = 0; /* true if we've already added one item to this object */
326         int set = 0;
327         int done = 0;
328
329         while(*index < current_strlen) {
330
331                 json_eat_ws(string, index, 1, current_strlen);
332
333                 if(string[*index] == '}') {
334                         (*index)++;
335                         done = 1;
336                         break;
337                 }
338
339                 if(in_parse) {
340                         if(string[*index] != ',') {
341                                 return json_handle_error(string, index,
342                                         "json_parse_json_object(): object missing ',' between elements" );
343                         }
344                         (*index)++;
345                         json_eat_ws(string, index, 1, current_strlen);
346                 }
347
348                 /* first we grab the hash key */
349                 jsonObject* key_obj = jsonNewObject(NULL);
350                 status = _json_parse_string(string, index, key_obj, current_strlen);
351                 if(status) return status;
352
353                 if(key_obj->type != JSON_STRING) {
354                         return json_handle_error(string, index, 
355                                 "_json_parse_json_object(): hash key not a string");
356                 }
357
358                 char* key = key_obj->value.s;
359
360                 json_eat_ws(string, index, 1, current_strlen);
361
362                 if(string[*index] != ':') {
363                         return json_handle_error(string, index, 
364                                 "json_parse_json_object(): hash key not followed by ':' character");
365                 }
366
367                 (*index)++;
368
369                 /* now grab the value object */
370                 json_eat_ws(string, index, 1, current_strlen);
371                 jsonObject* value_obj = jsonNewObject(NULL);
372
373 #ifndef STRICT_JSON_READ
374                 if(*index < current_strlen) {
375                         if(string[*index] == ',' || string[*index] == '}') {
376                                 status = 0;
377                                 set = 1;
378                         }
379                 }
380                 if(!set)
381                         status = _json_parse_string(string, index, value_obj, current_strlen);
382
383 #else
384                  status = _json_parse_string(string, index, value_obj, current_strlen);
385 #endif
386
387                 if(status) return status;
388
389                 /* put the data into the object and continue */
390                 jsonObjectSetKey(obj, key, value_obj);
391                 jsonObjectFree(key_obj);
392                 in_parse = 1;
393                 set = 0;
394         }
395
396         if(!done)
397                 return json_handle_error(string, index,
398                         "json_parse_json_object(): object not closed");
399
400         return 0;
401 }
402
403
404
405 /* when done, index will point to the character after the closing quote */
406 int json_parse_json_string(char* string, unsigned long* index, jsonObject* obj, int current_strlen) {
407         if( ! string || ! index || *index >= current_strlen ) return -1;
408
409         int in_escape = 0;      
410         int done = 0;
411         growing_buffer* buf = buffer_init(64);
412
413         while(*index < current_strlen) {
414
415                 char c = string[*index]; 
416
417                 switch(c) {
418
419                         case '\\':
420                                 if(in_escape) {
421                                         buffer_add(buf, "\\");
422                                         in_escape = 0;
423                                 } else 
424                                         in_escape = 1;
425                                 break;
426
427                         case '"':
428                                 if(in_escape) {
429                                         buffer_add(buf, "\"");
430                                         in_escape = 0;
431                                 } else 
432                                         done = 1;
433                                 break;
434
435                         case 't':
436                                 if(in_escape) {
437                                         buffer_add(buf,"\t");
438                                         in_escape = 0;
439                                 } else 
440                                         buffer_add_char(buf, c);
441                                 break;
442
443                         case 'b':
444                                 if(in_escape) {
445                                         buffer_add(buf,"\b");
446                                         in_escape = 0;
447                                 } else 
448                                         buffer_add_char(buf, c);
449                                 break;
450
451                         case 'f':
452                                 if(in_escape) {
453                                         buffer_add(buf,"\f");
454                                         in_escape = 0;
455                                 } else 
456                                         buffer_add_char(buf, c);
457                                 break;
458
459                         case 'r':
460                                 if(in_escape) {
461                                         buffer_add(buf,"\r");
462                                         in_escape = 0;
463                                 } else 
464                                         buffer_add_char(buf, c);
465                                 break;
466
467                         case 'n':
468                                 if(in_escape) {
469                                         buffer_add(buf,"\n");
470                                         in_escape = 0;
471                                 } else 
472                                         buffer_add_char(buf, c);
473                                 break;
474
475                         case 'u':
476                                 if(in_escape) {
477                                         (*index)++;
478
479                                         if(*index >= (current_strlen - 4)) {
480                                                 buffer_free(buf);
481                                                 return json_handle_error(string, index,
482                                                         "json_parse_json_string(): truncated escaped unicode"); }
483
484                                         char buff[5];
485                                         memset(buff,0,5);
486                                         memcpy(buff, string + (*index), 4);
487
488
489                                         /* ----------------------------------------------------------------------- */
490                                         /* ----------------------------------------------------------------------- */
491                                         /* The following chunk was borrowed with permission from 
492                                                 json-c http://oss.metaparadigm.com/json-c/ */
493                                         unsigned char utf_out[4];
494                                         memset(utf_out,0,4);
495
496                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
497
498                                         unsigned int ucs_char =
499                                                 (hexdigit(string[*index] ) << 12) +
500                                                 (hexdigit(string[*index + 1]) << 8) +
501                                                 (hexdigit(string[*index + 2]) << 4) +
502                                                 hexdigit(string[*index + 3]);
503         
504                                         if (ucs_char < 0x80) {
505                                                 utf_out[0] = ucs_char;
506                                                 buffer_add(buf, utf_out);
507
508                                         } else if (ucs_char < 0x800) {
509                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
510                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
511                                                 buffer_add(buf, utf_out);
512
513                                         } else {
514                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
515                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
516                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
517                                                 buffer_add(buf, utf_out);
518                                         }
519                                         /* ----------------------------------------------------------------------- */
520                                         /* ----------------------------------------------------------------------- */
521
522                                         (*index) += 3;
523                                         in_escape = 0;
524
525                                 } else {
526
527                                         buffer_add_char(buf, c);
528                                 }
529
530                                 break;
531
532                         default:
533                                 buffer_add_char(buf, c);
534                 }
535
536                 (*index)++;
537                 if(done) break;
538         }
539
540         jsonObjectSetString(obj, buf->buf);
541         buffer_free(buf);
542         return 0;
543 }
544
545
546 void json_eat_ws(char* string, unsigned long* index, int eat_all, int current_strlen) {
547         if( ! string || ! index ) return;
548         if(*index >= current_strlen)
549                 return;
550
551         if( eat_all ) { /* removes newlines, etc */
552                 while(string[*index] == ' '     || 
553                                 string[*index] == '\n'  ||
554                                 string[*index] == '\t') 
555                         (*index)++;
556         }
557
558         else    
559                 while(string[*index] == ' ') (*index)++;
560 }
561
562
563 /* index should be at the '*' character at the beginning of the comment.
564  * when done, index will point to the first character after the final /
565  */
566 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class, int current_strlen) {
567         if( ! string || ! index || *index >= current_strlen ) return -1;
568         
569
570         if(string[*index] != '*' && string[*index] != '/' )
571                 return json_handle_error(string, index, 
572                         "json_eat_comment(): invalid character after /");
573
574         /* chop out any // style comments */
575         if(string[*index] == '/') {
576                 (*index)++;
577                 char c = string[*index];
578                 while(*index < current_strlen) {
579                         (*index)++;
580                         if(c == '\n') 
581                                 return 0;
582                         c = string[*index];
583                 }
584                 return 0;
585         }
586
587         (*index)++;
588
589         int on_star                     = 0; /* true if we just saw a '*' character */
590
591         /* we're just past the '*' */
592         if(!parse_class) { /* we're not concerned with class hints */
593                 while(*index < current_strlen) {
594                         if(string[*index] == '/') {
595                                 if(on_star) {
596                                         (*index)++;
597                                         return 0;
598                                 }
599                         }
600
601                         if(string[*index] == '*') on_star = 1;
602                         else on_star = 0;
603
604                         (*index)++;
605                 }
606                 return 0;
607         }
608
609
610
611         growing_buffer* buf = buffer_init(64);
612
613         int first_dash          = 0;
614         int second_dash = 0;
615         int third_dash          = 0;
616         int fourth_dash = 0;
617
618         int in_hint                     = 0;
619         int done                                = 0;
620
621         /*--S hint--*/   /* <-- Hints  look like this */
622         /*--E hint--*/
623
624         while(*index < current_strlen) {
625                 char c = string[*index];
626
627                 switch(c) {
628
629                         case '-':
630                                 on_star = 0;
631                                 if(third_dash)                  fourth_dash = 1;
632                                 else if(in_hint)                third_dash      = 1;
633                                 else if(first_dash)     second_dash = 1;
634                                 else                                            first_dash = 1;
635                                 break;
636
637                         case 'S':
638                                 on_star = 0;
639                                 if(second_dash && !in_hint) {
640                                         (*index)++;
641                                         json_eat_ws(string, index, 1, current_strlen);
642                                         (*index)--; /* this will get incremented at the bottom of the loop */
643                                         in_hint = 1;
644                                         break;
645                                 } 
646
647                                 if(second_dash && in_hint) {
648                                         buffer_add_char(buf, c);
649                                         break;
650                                 }
651
652                         case 'E':
653                                 on_star = 0;
654                                 if(second_dash && !in_hint) {
655                                         (*index)++;
656                                         json_eat_ws(string, index, 1, current_strlen);
657                                         (*index)--; /* this will get incremented at the bottom of the loop */
658                                         in_hint = 1;
659                                         break;
660                                 }
661
662                                 if(second_dash && in_hint) {
663                                         buffer_add_char(buf, c);
664                                         break;
665                                 }
666
667                         case '*':
668                                 on_star = 1;
669                                 break;
670
671                         case '/':
672                                 if(on_star) 
673                                         done = 1;
674                                 else
675                                 on_star = 0;
676                                 break;
677
678                         default:
679                                 on_star = 0;
680                                 if(in_hint)
681                                         buffer_add_char(buf, c);
682                 }
683
684                 (*index)++;
685                 if(done) break;
686         }
687
688         if( buf->n_used > 0 && buffer)
689                 *buffer = buffer_data(buf);
690
691         buffer_free(buf);
692         return 0;
693 }
694
695 int is_number(char c) {
696         switch(c) {
697                 case '0':
698                 case '1':
699                 case '2':
700                 case '3':
701                 case '4':
702                 case '5':
703                 case '6':
704                 case '7':
705                 case '8':
706                 case '9':
707                         return 1;
708         }
709         return 0;
710 }
711
712 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
713
714         char buf[60];
715         memset(buf, 0, 60);
716
717         if(*index > 30)
718                 strncpy( buf, string + (*index - 30), 59 );
719         else
720                 strncpy( buf, string, 59 );
721
722         fprintf(stderr, 
723                         "\nError parsing json string at charracter %c "
724                         "(code %d) and index %ld\nString length: %d\nMsg:\t%s\nNear:\t%s\nFull String:\t%s\n", 
725                         string[*index], string[*index], *index, current_strlen, err_msg, buf, string );
726
727         return -1;
728 }
729
730
731 jsonObject* jsonParseFile( const char* filename ) {
732         return json_parse_file( filename );
733 }
734         
735 jsonObject* json_parse_file(const char* filename) {
736         if(!filename) return NULL;
737         char* data = file_to_string(filename);
738         jsonObject* o = json_parse_string(data);
739         free(data);
740         return o;
741 }
742
743
744
745