]> git.evergreen-ils.org Git - Evergreen.git/blob - OpenSRF/src/objson/json_parser.c
a3e46923743613aa6ba7033d9afbf77dd79fc9f1
[Evergreen.git] / OpenSRF / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24 object* json_parse_string(char* string) {
25
26         if(string == NULL) return NULL;
27
28         current_strlen = strlen(string);
29
30         if(current_strlen == 0) 
31                 return NULL;
32
33         object* obj = new_object(NULL);
34         unsigned long index = 0;
35
36         json_eat_ws(string, &index, 1); /* remove leading whitespace */
37         if(index == current_strlen) return NULL;
38
39         int status = _json_parse_string(string, &index, obj);
40         if(!status) return obj;
41
42         if(status == -2)
43                 return NULL;
44
45         return NULL;
46 }
47
48
49 int _json_parse_string(char* string, unsigned long* index, object* obj) {
50         assert(string && index && *index < current_strlen);
51
52         int status = 0; /* return code from parsing routines */
53         char* classname = NULL; /* object class hint */
54         json_eat_ws(string, index, 1); /* remove leading whitespace */
55
56         char c = string[*index];
57
58         /* remove any leading comments */
59         if( c == '/' ) { 
60
61                 while(1) {
62                         (*index)++; /* move to second comment char */
63                         status = json_eat_comment(string, index, &classname, 1);
64                         if(status) return status;
65
66                         json_eat_ws(string, index, 1);
67                         c = string[*index];
68                         if(c != '/')
69                                 break;
70                 }
71         }
72
73         json_eat_ws(string, index, 1); /* remove leading whitespace */
74
75         if(*index >= current_strlen)
76                 return -2;
77
78         switch(c) {
79                                 
80                 /* json string */
81                 case '"': 
82                         (*index)++;
83                         status = json_parse_json_string(string, index, obj);
84                         break;
85
86                 /* json array */
87                 case '[':
88                         (*index)++;
89                         status = json_parse_json_array(string, index, obj);                     
90                         break;
91
92                 /* json object */
93                 case '{':
94                         (*index)++;
95                         status = json_parse_json_object(string, index, obj);
96                         break;
97
98                 /* NULL */
99                 case 'n':
100                 case 'N':
101                         status = json_parse_json_null(string, index, obj);
102                         break;
103                         
104
105                 /* true, false */
106                 case 'f':
107                 case 'F':
108                 case 't':
109                 case 'T':
110                         status = json_parse_json_bool(string, index, obj);
111                         break;
112
113                 default:
114                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
115                                 status = json_parse_json_number(string, index, obj);    
116                                 if(status) return status;
117                                 break;
118                         }
119
120                         (*index)--;
121                         /* we should never get here */
122                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
123         }       
124
125         if(status) return status;
126
127         json_eat_ws(string, index, 1);
128
129         if( *index < current_strlen ) {
130                 /* remove any trailing comments */
131                 c = string[*index];
132                 if( c == '/' ) { 
133                         (*index)++;
134                         status = json_eat_comment(string, index, NULL, 0);
135                         if(status) return status;
136                 }
137         }
138
139         if(classname){
140                 obj->set_class(obj, classname);
141                 free(classname);
142         }
143
144         return 0;
145 }
146
147
148 int json_parse_json_null(char* string, unsigned long* index, object* obj) {
149
150         if(*index >= (current_strlen - 3)) {
151                 return json_handle_error(string, index, 
152                         "_parse_json_string(): invalid null" );
153         }
154
155         if(!strncasecmp(string + (*index), "null", 4)) {
156                 (*index) += 4;
157                 obj->is_null = 1;
158                 return 0;
159         } else {
160                 return json_handle_error(string, index,
161                         "_parse_json_string(): invalid null" );
162         }
163 }
164
165 /* should be at the first character of the bool at this point */
166 int json_parse_json_bool(char* string, unsigned long* index, object* obj) {
167         assert(string && obj && *index < current_strlen);
168
169         char* ret = "json_parse_json_bool(): truncated bool";
170
171         if( *index >= (current_strlen - 5))
172                 return json_handle_error(string, index, ret);
173         
174         if(!strncasecmp( string + (*index), "false", 5)) {
175                 (*index) += 5;
176                 obj->bool_value = 0;
177                 obj->is_bool = 1;
178                 obj->is_null = 0;
179                 return 0;
180         }
181
182         if( *index >= (current_strlen - 4))
183                 return json_handle_error(string, index, ret);
184
185         if(!strncasecmp( string + (*index), "true", 4)) {
186                 (*index) += 4;
187                 obj->bool_value = 1;
188                 obj->is_bool = 1;
189                 obj->is_null = 0;
190                 return 0;
191         }
192
193         return json_handle_error(string, index, ret);
194 }
195
196
197 /* expecting the first character of the number */
198 int json_parse_json_number(char* string, unsigned long* index, object* obj) {
199         assert(string && obj && *index < current_strlen);
200
201         growing_buffer* buf = buffer_init(64);
202         char c = string[*index];
203
204         int done = 0;
205         int dot_seen = 0;
206
207         /* negative number? */
208         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
209
210         c = string[*index];
211
212         while(*index < current_strlen) {
213
214                 if(is_number(c)) {
215                         buffer_add_char(buf, c);
216                 }
217
218                 else if( c == '.' ) {
219                         if(dot_seen) {
220                                 return json_handle_error(string, index, 
221                                         "json_parse_json_number(): malformed json number");
222                         }
223                         dot_seen = 1;
224                         buffer_add_char(buf, c);
225                 } else {
226                         done = 1; break;
227                 }
228
229                 (*index)++;
230                 c = string[*index];
231                 if(done) break;
232         }
233
234         if(dot_seen) {
235                 obj->is_double = 1;
236                 obj->is_null = 0;
237                 obj->double_value = strtod(buf->buf, NULL);
238                 buffer_free(buf);
239                 return 0;
240
241         } else {
242                 obj->is_number = 1;
243                 obj->is_null = 0;
244                 obj->num_value = atol(buf->buf);
245                 buffer_free(buf);
246                 return 0;
247         }
248 }
249
250 /* index should point to the character directly following the '['.  when done
251  * index will point to the character directly following the ']' character
252  */
253 int json_parse_json_array(char* string, unsigned long* index, object* obj) {
254         assert(string && obj && index && *index < current_strlen);
255
256         int status = 0;
257         int in_parse = 0; /* true if this array already contains one item */
258         obj->is_array = 1;
259         obj->is_null = 0;
260         int set = 0;
261         int done = 0;
262
263         while(*index < current_strlen) {
264
265                 json_eat_ws(string, index, 1);
266
267                 if(string[*index] == ']') {
268                         (*index)++;
269                         done = 1;
270                         break;
271                 }
272
273                 if(in_parse) {
274                         json_eat_ws(string, index, 1);
275                         if(string[*index] != ',') {
276                                 return json_handle_error(string, index,
277                                         "json_parse_json_array(): array not followed by a ','");
278                         }
279                         (*index)++;
280                         json_eat_ws(string, index, 1);
281                 }
282
283                 object* item = new_object(NULL);
284
285 #ifndef STRICT_JSON_READ
286                 if(*index < current_strlen) {
287                         if(string[*index] == ',' || string[*index] == ']') {
288                                 status = 0;
289                                 set = 1;
290                         }
291                 }
292                 if(!set)
293                         status = _json_parse_string(string, index, item);
294
295 #else
296                  status = _json_parse_string(string, index, item);
297 #endif
298
299                 if(status) return status;
300                 obj->push(obj, item);
301                 in_parse = 1;
302                 set = 0;
303         }
304
305         if(!done)
306                 return json_handle_error(string, index,
307                         "json_parse_json_array(): array not closed");
308
309         return 0;
310 }
311
312
313 /* index should point to the character directly following the '{'.  when done
314  * index will point to the character directly following the '}'
315  */
316 int json_parse_json_object(char* string, unsigned long* index, object* obj) {
317         assert(string && obj && index && *index < current_strlen);
318
319         obj->is_hash = 1;
320         obj->is_null = 0;
321         int status;
322         int in_parse = 0; /* true if we've already added one item to this object */
323         int set = 0;
324         int done = 0;
325
326         while(*index < current_strlen) {
327
328                 json_eat_ws(string, index, 1);
329
330                 if(string[*index] == '}') {
331                         (*index)++;
332                         done = 1;
333                         break;
334                 }
335
336                 if(in_parse) {
337                         if(string[*index] != ',') {
338                                 return json_handle_error(string, index,
339                                         "json_parse_json_object(): object missing ',' between elements" );
340                         }
341                         (*index)++;
342                         json_eat_ws(string, index, 1);
343                 }
344
345                 /* first we grab the hash key */
346                 object* key_obj = new_object(NULL);
347                 status = _json_parse_string(string, index, key_obj);
348                 if(status) return status;
349
350                 if(!key_obj->is_string) {
351                         return json_handle_error(string, index, 
352                                 "_json_parse_json_object(): hash key not a string");
353                 }
354
355                 char* key = key_obj->string_data;
356
357                 json_eat_ws(string, index, 1);
358
359                 if(string[*index] != ':') {
360                         return json_handle_error(string, index, 
361                                 "json_parse_json_object(): hash key not followed by ':' character");
362                 }
363
364                 (*index)++;
365
366                 /* now grab the value object */
367                 json_eat_ws(string, index, 1);
368                 object* value_obj = new_object(NULL);
369
370 #ifndef STRICT_JSON_READ
371                 if(*index < current_strlen) {
372                         if(string[*index] == ',' || string[*index] == '}') {
373                                 status = 0;
374                                 set = 1;
375                         }
376                 }
377                 if(!set)
378                         status = _json_parse_string(string, index, value_obj);
379
380 #else
381                  status = _json_parse_string(string, index, value_obj);
382 #endif
383
384                 if(status) return status;
385
386                 /* put the data into the object and continue */
387                 obj->add_key(obj, key, value_obj);
388                 free_object(key_obj);
389                 in_parse = 1;
390                 set = 0;
391         }
392
393         if(!done)
394                 return json_handle_error(string, index,
395                         "json_parse_json_object(): object not closed");
396
397         return 0;
398 }
399
400
401
402 /* when done, index will point to the character after the closing quote */
403 int json_parse_json_string(char* string, unsigned long* index, object* obj) {
404         assert(string && index && *index < current_strlen);
405
406         int in_escape = 0;      
407         int done = 0;
408         growing_buffer* buf = buffer_init(64);
409
410         while(*index < current_strlen) {
411
412                 char c = string[*index]; 
413
414                 switch(c) {
415
416                         case '\\':
417                                 if(in_escape) {
418                                         buffer_add(buf, "\\");
419                                         in_escape = 0;
420                                 } else 
421                                         in_escape = 1;
422                                 break;
423
424                         case '"':
425                                 if(in_escape) {
426                                         buffer_add(buf, "\"");
427                                         in_escape = 0;
428                                 } else 
429                                         done = 1;
430                                 break;
431
432                         case 't':
433                                 if(in_escape) {
434                                         buffer_add(buf,"\t");
435                                         in_escape = 0;
436                                 } else 
437                                         buffer_add_char(buf, c);
438                                 break;
439
440                         case 'b':
441                                 if(in_escape) {
442                                         buffer_add(buf,"\b");
443                                         in_escape = 0;
444                                 } else 
445                                         buffer_add_char(buf, c);
446                                 break;
447
448                         case 'f':
449                                 if(in_escape) {
450                                         buffer_add(buf,"\f");
451                                         in_escape = 0;
452                                 } else 
453                                         buffer_add_char(buf, c);
454                                 break;
455
456                         case 'r':
457                                 if(in_escape) {
458                                         buffer_add(buf,"\r");
459                                         in_escape = 0;
460                                 } else 
461                                         buffer_add_char(buf, c);
462                                 break;
463
464                         case 'n':
465                                 if(in_escape) {
466                                         buffer_add(buf,"\n");
467                                         in_escape = 0;
468                                 } else 
469                                         buffer_add_char(buf, c);
470                                 break;
471
472                         case 'u':
473                                 if(in_escape) {
474                                         (*index)++;
475
476                                         if(*index >= (current_strlen - 4)) {
477                                                 return json_handle_error(string, index,
478                                                         "json_parse_json_string(): truncated escaped unicode"); }
479
480                                         char buff[5];
481                                         memset(buff,0,5);
482                                         memcpy(buff, string + (*index), 4);
483
484
485                                         /* ----------------------------------------------------------------------- */
486                                         /* ----------------------------------------------------------------------- */
487                                         /* The following chunk was borrowed with permission from 
488                                                 json-c http://oss.metaparadigm.com/json-c/ */
489                                         unsigned char utf_out[3];
490                                         memset(utf_out,0,3);
491
492                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
493
494                                         unsigned int ucs_char =
495                                                 (hexdigit(string[*index] ) << 12) +
496                                                 (hexdigit(string[*index + 1]) << 8) +
497                                                 (hexdigit(string[*index + 2]) << 4) +
498                                                 hexdigit(string[*index + 3]);
499         
500                                         if (ucs_char < 0x80) {
501                                                 utf_out[0] = ucs_char;
502                                                 buffer_add(buf, utf_out);
503
504                                         } else if (ucs_char < 0x800) {
505                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
506                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
507                                                 buffer_add(buf, utf_out);
508
509                                         } else {
510                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
511                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
512                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
513                                                 buffer_add(buf, utf_out);
514                                         }
515                                         /* ----------------------------------------------------------------------- */
516                                         /* ----------------------------------------------------------------------- */
517
518                                         (*index) += 3;
519                                         in_escape = 0;
520
521                                 } else {
522
523                                         buffer_add_char(buf, c);
524                                 }
525
526                                 break;
527
528                         default:
529                                 buffer_add_char(buf, c);
530                 }
531
532                 (*index)++;
533                 if(done) break;
534         }
535
536         obj->set_string(obj, buf->buf);
537         buffer_free(buf);
538         return 0;
539 }
540
541
542 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
543         assert(string && index);
544         if(*index >= current_strlen)
545                 return;
546
547         if( eat_all ) { /* removes newlines, etc */
548                 while(string[*index] == ' '     || 
549                                 string[*index] == '\n'  ||
550                                 string[*index] == '\t') 
551                         (*index)++;
552         }
553
554         else    
555                 while(string[*index] == ' ') (*index)++;
556 }
557
558
559 /* index should be at the '*' character at the beginning of the comment.
560  * when done, index will point to the first character after the final /
561  */
562 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
563         assert(string && index && *index < current_strlen);
564
565         if(string[*index] != '*' && string[*index] != '/' )
566                 return json_handle_error(string, index, 
567                         "json_eat_comment(): invalid character after /");
568
569         /* chop out any // style comments */
570         if(string[*index] == '/') {
571                 (*index)++;
572                 char c = string[*index];
573                 while(*index < current_strlen) {
574                         (*index)++;
575                         if(c == '\n') 
576                                 return 0;
577                         c = string[*index];
578                 }
579                 return 0;
580         }
581
582         (*index)++;
583
584         int on_star                     = 0; /* true if we just saw a '*' character */
585
586         /* we're just past the '*' */
587         if(!parse_class) { /* we're not concerned with class hints */
588                 while(*index < current_strlen) {
589                         if(string[*index] == '/') {
590                                 if(on_star) {
591                                         (*index)++;
592                                         return 0;
593                                 }
594                         }
595
596                         if(string[*index] == '*') on_star = 1;
597                         else on_star = 0;
598
599                         (*index)++;
600                 }
601                 return 0;
602         }
603
604
605
606         growing_buffer* buf = buffer_init(64);
607
608         int first_dash          = 0;
609         int second_dash = 0;
610         int third_dash          = 0;
611         int fourth_dash = 0;
612
613         int in_hint                     = 0;
614         int done                                = 0;
615
616         /*--S hint--*/   /* <-- Hints  look like this */
617         /*--E hint--*/
618
619         while(*index < current_strlen) {
620                 char c = string[*index];
621
622                 switch(c) {
623
624                         case '-':
625                                 on_star = 0;
626                                 if(third_dash)                  fourth_dash = 1;
627                                 else if(in_hint)                third_dash      = 1;
628                                 else if(first_dash)     second_dash = 1;
629                                 else                                            first_dash = 1;
630                                 break;
631
632                         case 'S':
633                                 on_star = 0;
634                                 if(second_dash && !in_hint) {
635                                         (*index)++;
636                                         json_eat_ws(string, index, 1);
637                                         (*index)--; /* this will get incremented at the bottom of the loop */
638                                         in_hint = 1;
639                                         break;
640                                 } 
641
642                                 if(second_dash && in_hint) {
643                                         buffer_add_char(buf, c);
644                                         break;
645                                 }
646
647                         case 'E':
648                                 on_star = 0;
649                                 if(second_dash && !in_hint) {
650                                         (*index)++;
651                                         json_eat_ws(string, index, 1);
652                                         (*index)--; /* this will get incremented at the bottom of the loop */
653                                         in_hint = 1;
654                                         break;
655                                 }
656
657                                 if(second_dash && in_hint) {
658                                         buffer_add_char(buf, c);
659                                         break;
660                                 }
661
662                         case '*':
663                                 on_star = 1;
664                                 break;
665
666                         case '/':
667                                 if(on_star) 
668                                         done = 1;
669                                 else
670                                 on_star = 0;
671                                 break;
672
673                         default:
674                                 on_star = 0;
675                                 if(in_hint)
676                                         buffer_add_char(buf, c);
677                 }
678
679                 (*index)++;
680                 if(done) break;
681         }
682
683         if( buf->n_used > 0 && buffer)
684                 *buffer = buffer_data(buf);
685
686         buffer_free(buf);
687         return 0;
688 }
689
690 int is_number(char c) {
691         switch(c) {
692                 case '0':
693                 case '1':
694                 case '2':
695                 case '3':
696                 case '4':
697                 case '5':
698                 case '6':
699                 case '7':
700                 case '8':
701                 case '9':
702                         return 1;
703         }
704         return 0;
705 }
706
707 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
708
709         char buf[60];
710         memset(buf, 0, 60);
711
712         if(*index > 30)
713                 strncpy( buf, string + (*index - 30), 59 );
714         else
715                 strncpy( buf, string, 59 );
716
717         fprintf(stderr, 
718                         "\nError parsing json string at charracter %c "
719                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
720                         string[*index], string[*index], *index, err_msg, buf );
721         return -1;
722 }
723
724