69fb77948e5ce935f558fd6b857b05d6f2e54c32
[Evergreen.git] / OpenSRF / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24
25 jsonObject* jsonParseString( char* string, ... ) {
26         VA_LIST_TO_STRING(string);
27         return json_parse_string( VA_BUF );
28 }
29
30 //jsonObject* (*jsonParseString) (char* str) = &_jsonParseString;
31
32 jsonObject* json_parse_string(char* string) {
33
34         if(string == NULL) return NULL;
35
36         current_strlen = strlen(string);
37
38         if(current_strlen == 0) 
39                 return NULL;
40
41         unsigned long index = 0;
42
43         json_eat_ws(string, &index, 1); /* remove leading whitespace */
44         if(index == current_strlen) return NULL;
45
46         jsonObject* obj = jsonNewObject(NULL);
47
48         int status = _json_parse_string(string, &index, obj);
49         if(!status) return obj;
50
51         if(status == -2) {
52                 jsonObjectFree(obj);
53                 return NULL;
54         }
55
56         return NULL;
57 }
58
59
60 int _json_parse_string(char* string, unsigned long* index, jsonObject* obj) {
61         if( !string || !index || *index >= current_strlen) return -2;
62
63         int status = 0; /* return code from parsing routines */
64         char* classname = NULL; /* object class hint */
65         json_eat_ws(string, index, 1); /* remove leading whitespace */
66
67         char c = string[*index];
68
69         /* remove any leading comments */
70         if( c == '/' ) { 
71
72                 while(1) {
73                         (*index)++; /* move to second comment char */
74                         status = json_eat_comment(string, index, &classname, 1);
75                         if(status) return status;
76
77                         json_eat_ws(string, index, 1);
78                         c = string[*index];
79                         if(c != '/')
80                                 break;
81                 }
82         }
83
84         json_eat_ws(string, index, 1); /* remove leading whitespace */
85
86         if(*index >= current_strlen)
87                 return -2;
88
89         switch(c) {
90                                 
91                 /* json string */
92                 case '"': 
93                         (*index)++;
94                         status = json_parse_json_string(string, index, obj);
95                         break;
96
97                 /* json array */
98                 case '[':
99                         (*index)++;
100                         status = json_parse_json_array(string, index, obj);                     
101                         break;
102
103                 /* json object */
104                 case '{':
105                         (*index)++;
106                         status = json_parse_json_object(string, index, obj);
107                         break;
108
109                 /* NULL */
110                 case 'n':
111                 case 'N':
112                         status = json_parse_json_null(string, index, obj);
113                         break;
114                         
115
116                 /* true, false */
117                 case 'f':
118                 case 'F':
119                 case 't':
120                 case 'T':
121                         status = json_parse_json_bool(string, index, obj);
122                         break;
123
124                 default:
125                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
126                                 status = json_parse_json_number(string, index, obj);    
127                                 if(status) return status;
128                                 break;
129                         }
130
131                         (*index)--;
132                         /* we should never get here */
133                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
134         }       
135
136         if(status) return status;
137
138         json_eat_ws(string, index, 1);
139
140         if( *index < current_strlen ) {
141                 /* remove any trailing comments */
142                 c = string[*index];
143                 if( c == '/' ) { 
144                         (*index)++;
145                         status = json_eat_comment(string, index, NULL, 0);
146                         if(status) return status;
147                 }
148         }
149
150         if(classname){
151                 jsonObjectSetClass(obj, classname);
152                 free(classname);
153         }
154
155         return 0;
156 }
157
158
159 int json_parse_json_null(char* string, unsigned long* index, jsonObject* obj) {
160
161         if(*index >= (current_strlen - 3)) {
162                 return json_handle_error(string, index, 
163                         "_parse_json_string(): invalid null" );
164         }
165
166         if(!strncasecmp(string + (*index), "null", 4)) {
167                 (*index) += 4;
168                 obj->type = JSON_NULL;
169                 return 0;
170         } else {
171                 return json_handle_error(string, index,
172                         "_parse_json_string(): invalid null" );
173         }
174 }
175
176 /* should be at the first character of the bool at this point */
177 int json_parse_json_bool(char* string, unsigned long* index, jsonObject* obj) {
178         if( ! string || ! obj || *index >= current_strlen ) return -1;
179
180         char* ret = "json_parse_json_bool(): truncated bool";
181
182         if( *index >= (current_strlen - 5))
183                 return json_handle_error(string, index, ret);
184         
185         if(!strncasecmp( string + (*index), "false", 5)) {
186                 (*index) += 5;
187                 obj->value.b = 0;
188                 obj->type = JSON_BOOL;
189                 return 0;
190         }
191
192         if( *index >= (current_strlen - 4))
193                 return json_handle_error(string, index, ret);
194
195         if(!strncasecmp( string + (*index), "true", 4)) {
196                 (*index) += 4;
197                 obj->value.b = 1;
198                 obj->type = JSON_BOOL;
199                 return 0;
200         }
201
202         return json_handle_error(string, index, ret);
203 }
204
205
206 /* expecting the first character of the number */
207 int json_parse_json_number(char* string, unsigned long* index, jsonObject* obj) {
208         if( ! string || ! obj || *index >= current_strlen ) return -1;
209
210         growing_buffer* buf = buffer_init(64);
211         char c = string[*index];
212
213         int done = 0;
214         int dot_seen = 0;
215
216         /* negative number? */
217         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
218
219         c = string[*index];
220
221         while(*index < current_strlen) {
222
223                 if(is_number(c)) {
224                         buffer_add_char(buf, c);
225                 }
226
227                 else if( c == '.' ) {
228                         if(dot_seen) {
229                                 buffer_free(buf);
230                                 return json_handle_error(string, index, 
231                                         "json_parse_json_number(): malformed json number");
232                         }
233                         dot_seen = 1;
234                         buffer_add_char(buf, c);
235                 } else {
236                         done = 1; break;
237                 }
238
239                 (*index)++;
240                 c = string[*index];
241                 if(done) break;
242         }
243
244         obj->type = JSON_NUMBER;
245         obj->value.n = strtod(buf->buf, NULL);
246         buffer_free(buf);
247         return 0;
248 }
249
250 /* index should point to the character directly following the '['.  when done
251  * index will point to the character directly following the ']' character
252  */
253 int json_parse_json_array(char* string, unsigned long* index, jsonObject* obj) {
254
255         if( ! string || ! obj || ! index || *index >= current_strlen ) return -1;
256
257         int status = 0;
258         int in_parse = 0; /* true if this array already contains one item */
259         obj->type = JSON_ARRAY;
260         int set = 0;
261         int done = 0;
262
263         while(*index < current_strlen) {
264
265                 json_eat_ws(string, index, 1);
266
267                 if(string[*index] == ']') {
268                         (*index)++;
269                         done = 1;
270                         break;
271                 }
272
273                 if(in_parse) {
274                         json_eat_ws(string, index, 1);
275                         if(string[*index] != ',') {
276                                 return json_handle_error(string, index,
277                                         "json_parse_json_array(): array item not followed by a ','");
278                         }
279                         (*index)++;
280                         json_eat_ws(string, index, 1);
281                 }
282
283                 jsonObject* item = jsonNewObject(NULL);
284
285                 #ifndef STRICT_JSON_READ
286                 if(*index < current_strlen) {
287                         if(string[*index] == ',' || string[*index] == ']') {
288                                 status = 0;
289                                 set = 1;
290                         }
291                 }
292                 if(!set) status = _json_parse_string(string, index, item);
293
294                 #else
295                 status = _json_parse_string(string, index, item);
296                 #endif
297
298                 if(status) { jsonObjectFree(item); return status; }
299                 jsonObjectPush(obj, item);
300                 in_parse = 1;
301                 set = 0;
302         }
303
304         if(!done)
305                 return json_handle_error(string, index,
306                         "json_parse_json_array(): array not closed");
307
308         return 0;
309 }
310
311
312 /* index should point to the character directly following the '{'.  when done
313  * index will point to the character directly following the '}'
314  */
315 int json_parse_json_object(char* string, unsigned long* index, jsonObject* obj) {
316         if( ! string || !obj || ! index || *index >= current_strlen ) return -1;
317
318         obj->type = JSON_HASH;
319         int status;
320         int in_parse = 0; /* true if we've already added one item to this object */
321         int set = 0;
322         int done = 0;
323
324         while(*index < current_strlen) {
325
326                 json_eat_ws(string, index, 1);
327
328                 if(string[*index] == '}') {
329                         (*index)++;
330                         done = 1;
331                         break;
332                 }
333
334                 if(in_parse) {
335                         if(string[*index] != ',') {
336                                 return json_handle_error(string, index,
337                                         "json_parse_json_object(): object missing ',' between elements" );
338                         }
339                         (*index)++;
340                         json_eat_ws(string, index, 1);
341                 }
342
343                 /* first we grab the hash key */
344                 jsonObject* key_obj = jsonNewObject(NULL);
345                 status = _json_parse_string(string, index, key_obj);
346                 if(status) return status;
347
348                 if(key_obj->type != JSON_STRING) {
349                         return json_handle_error(string, index, 
350                                 "_json_parse_json_object(): hash key not a string");
351                 }
352
353                 char* key = key_obj->value.s;
354
355                 json_eat_ws(string, index, 1);
356
357                 if(string[*index] != ':') {
358                         return json_handle_error(string, index, 
359                                 "json_parse_json_object(): hash key not followed by ':' character");
360                 }
361
362                 (*index)++;
363
364                 /* now grab the value object */
365                 json_eat_ws(string, index, 1);
366                 jsonObject* value_obj = jsonNewObject(NULL);
367
368 #ifndef STRICT_JSON_READ
369                 if(*index < current_strlen) {
370                         if(string[*index] == ',' || string[*index] == '}') {
371                                 status = 0;
372                                 set = 1;
373                         }
374                 }
375                 if(!set)
376                         status = _json_parse_string(string, index, value_obj);
377
378 #else
379                  status = _json_parse_string(string, index, value_obj);
380 #endif
381
382                 if(status) return status;
383
384                 /* put the data into the object and continue */
385                 jsonObjectSetKey(obj, key, value_obj);
386                 jsonObjectFree(key_obj);
387                 in_parse = 1;
388                 set = 0;
389         }
390
391         if(!done)
392                 return json_handle_error(string, index,
393                         "json_parse_json_object(): object not closed");
394
395         return 0;
396 }
397
398
399
400 /* when done, index will point to the character after the closing quote */
401 int json_parse_json_string(char* string, unsigned long* index, jsonObject* obj) {
402         if( ! string || ! index || *index >= current_strlen ) return -1;
403
404         int in_escape = 0;      
405         int done = 0;
406         growing_buffer* buf = buffer_init(64);
407
408         while(*index < current_strlen) {
409
410                 char c = string[*index]; 
411
412                 switch(c) {
413
414                         case '\\':
415                                 if(in_escape) {
416                                         buffer_add(buf, "\\");
417                                         in_escape = 0;
418                                 } else 
419                                         in_escape = 1;
420                                 break;
421
422                         case '"':
423                                 if(in_escape) {
424                                         buffer_add(buf, "\"");
425                                         in_escape = 0;
426                                 } else 
427                                         done = 1;
428                                 break;
429
430                         case 't':
431                                 if(in_escape) {
432                                         buffer_add(buf,"\t");
433                                         in_escape = 0;
434                                 } else 
435                                         buffer_add_char(buf, c);
436                                 break;
437
438                         case 'b':
439                                 if(in_escape) {
440                                         buffer_add(buf,"\b");
441                                         in_escape = 0;
442                                 } else 
443                                         buffer_add_char(buf, c);
444                                 break;
445
446                         case 'f':
447                                 if(in_escape) {
448                                         buffer_add(buf,"\f");
449                                         in_escape = 0;
450                                 } else 
451                                         buffer_add_char(buf, c);
452                                 break;
453
454                         case 'r':
455                                 if(in_escape) {
456                                         buffer_add(buf,"\r");
457                                         in_escape = 0;
458                                 } else 
459                                         buffer_add_char(buf, c);
460                                 break;
461
462                         case 'n':
463                                 if(in_escape) {
464                                         buffer_add(buf,"\n");
465                                         in_escape = 0;
466                                 } else 
467                                         buffer_add_char(buf, c);
468                                 break;
469
470                         case 'u':
471                                 if(in_escape) {
472                                         (*index)++;
473
474                                         if(*index >= (current_strlen - 4)) {
475                                                 buffer_free(buf);
476                                                 return json_handle_error(string, index,
477                                                         "json_parse_json_string(): truncated escaped unicode"); }
478
479                                         char buff[5];
480                                         memset(buff,0,5);
481                                         memcpy(buff, string + (*index), 4);
482
483
484                                         /* ----------------------------------------------------------------------- */
485                                         /* ----------------------------------------------------------------------- */
486                                         /* The following chunk was borrowed with permission from 
487                                                 json-c http://oss.metaparadigm.com/json-c/ */
488                                         unsigned char utf_out[3];
489                                         memset(utf_out,0,3);
490
491                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
492
493                                         unsigned int ucs_char =
494                                                 (hexdigit(string[*index] ) << 12) +
495                                                 (hexdigit(string[*index + 1]) << 8) +
496                                                 (hexdigit(string[*index + 2]) << 4) +
497                                                 hexdigit(string[*index + 3]);
498         
499                                         if (ucs_char < 0x80) {
500                                                 utf_out[0] = ucs_char;
501                                                 buffer_add(buf, utf_out);
502
503                                         } else if (ucs_char < 0x800) {
504                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
505                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
506                                                 buffer_add(buf, utf_out);
507
508                                         } else {
509                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
510                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
511                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
512                                                 buffer_add(buf, utf_out);
513                                         }
514                                         /* ----------------------------------------------------------------------- */
515                                         /* ----------------------------------------------------------------------- */
516
517                                         (*index) += 3;
518                                         in_escape = 0;
519
520                                 } else {
521
522                                         buffer_add_char(buf, c);
523                                 }
524
525                                 break;
526
527                         default:
528                                 buffer_add_char(buf, c);
529                 }
530
531                 (*index)++;
532                 if(done) break;
533         }
534
535         jsonObjectSetString(obj, buf->buf);
536         buffer_free(buf);
537         return 0;
538 }
539
540
541 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
542         if( ! string || ! index ) return;
543         if(*index >= current_strlen)
544                 return;
545
546         if( eat_all ) { /* removes newlines, etc */
547                 while(string[*index] == ' '     || 
548                                 string[*index] == '\n'  ||
549                                 string[*index] == '\t') 
550                         (*index)++;
551         }
552
553         else    
554                 while(string[*index] == ' ') (*index)++;
555 }
556
557
558 /* index should be at the '*' character at the beginning of the comment.
559  * when done, index will point to the first character after the final /
560  */
561 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
562         if( ! string || ! index || *index >= current_strlen ) return -1;
563         
564
565         if(string[*index] != '*' && string[*index] != '/' )
566                 return json_handle_error(string, index, 
567                         "json_eat_comment(): invalid character after /");
568
569         /* chop out any // style comments */
570         if(string[*index] == '/') {
571                 (*index)++;
572                 char c = string[*index];
573                 while(*index < current_strlen) {
574                         (*index)++;
575                         if(c == '\n') 
576                                 return 0;
577                         c = string[*index];
578                 }
579                 return 0;
580         }
581
582         (*index)++;
583
584         int on_star                     = 0; /* true if we just saw a '*' character */
585
586         /* we're just past the '*' */
587         if(!parse_class) { /* we're not concerned with class hints */
588                 while(*index < current_strlen) {
589                         if(string[*index] == '/') {
590                                 if(on_star) {
591                                         (*index)++;
592                                         return 0;
593                                 }
594                         }
595
596                         if(string[*index] == '*') on_star = 1;
597                         else on_star = 0;
598
599                         (*index)++;
600                 }
601                 return 0;
602         }
603
604
605
606         growing_buffer* buf = buffer_init(64);
607
608         int first_dash          = 0;
609         int second_dash = 0;
610         int third_dash          = 0;
611         int fourth_dash = 0;
612
613         int in_hint                     = 0;
614         int done                                = 0;
615
616         /*--S hint--*/   /* <-- Hints  look like this */
617         /*--E hint--*/
618
619         while(*index < current_strlen) {
620                 char c = string[*index];
621
622                 switch(c) {
623
624                         case '-':
625                                 on_star = 0;
626                                 if(third_dash)                  fourth_dash = 1;
627                                 else if(in_hint)                third_dash      = 1;
628                                 else if(first_dash)     second_dash = 1;
629                                 else                                            first_dash = 1;
630                                 break;
631
632                         case 'S':
633                                 on_star = 0;
634                                 if(second_dash && !in_hint) {
635                                         (*index)++;
636                                         json_eat_ws(string, index, 1);
637                                         (*index)--; /* this will get incremented at the bottom of the loop */
638                                         in_hint = 1;
639                                         break;
640                                 } 
641
642                                 if(second_dash && in_hint) {
643                                         buffer_add_char(buf, c);
644                                         break;
645                                 }
646
647                         case 'E':
648                                 on_star = 0;
649                                 if(second_dash && !in_hint) {
650                                         (*index)++;
651                                         json_eat_ws(string, index, 1);
652                                         (*index)--; /* this will get incremented at the bottom of the loop */
653                                         in_hint = 1;
654                                         break;
655                                 }
656
657                                 if(second_dash && in_hint) {
658                                         buffer_add_char(buf, c);
659                                         break;
660                                 }
661
662                         case '*':
663                                 on_star = 1;
664                                 break;
665
666                         case '/':
667                                 if(on_star) 
668                                         done = 1;
669                                 else
670                                 on_star = 0;
671                                 break;
672
673                         default:
674                                 on_star = 0;
675                                 if(in_hint)
676                                         buffer_add_char(buf, c);
677                 }
678
679                 (*index)++;
680                 if(done) break;
681         }
682
683         if( buf->n_used > 0 && buffer)
684                 *buffer = buffer_data(buf);
685
686         buffer_free(buf);
687         return 0;
688 }
689
690 int is_number(char c) {
691         switch(c) {
692                 case '0':
693                 case '1':
694                 case '2':
695                 case '3':
696                 case '4':
697                 case '5':
698                 case '6':
699                 case '7':
700                 case '8':
701                 case '9':
702                         return 1;
703         }
704         return 0;
705 }
706
707 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
708
709         char buf[60];
710         memset(buf, 0, 60);
711
712         if(*index > 30)
713                 strncpy( buf, string + (*index - 30), 59 );
714         else
715                 strncpy( buf, string, 59 );
716
717         fprintf(stderr, 
718                         "\nError parsing json string at charracter %c "
719                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
720                         string[*index], string[*index], *index, err_msg, buf );
721
722         return -1;
723 }
724
725
726 jsonObject* jsonParseFile( const char* filename ) {
727         return json_parse_file( filename );
728 }
729         
730 jsonObject* json_parse_file(const char* filename) {
731         if(!filename) return NULL;
732         char* data = file_to_string(filename);
733         jsonObject* o = json_parse_string(data);
734         free(data);
735         return o;
736 }
737
738
739
740