]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/objson/json_parser.c
new json api changes
[OpenSRF.git] / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24
25 jsonObject* jsonParseString( char* string ) {
26         return json_parse_string( string );
27 }
28
29 //jsonObject* (*jsonParseString) (char* str) = &_jsonParseString;
30
31 jsonObject* json_parse_string(char* string) {
32
33         if(string == NULL) return NULL;
34
35         current_strlen = strlen(string);
36
37         if(current_strlen == 0) 
38                 return NULL;
39
40         unsigned long index = 0;
41
42         json_eat_ws(string, &index, 1); /* remove leading whitespace */
43         if(index == current_strlen) return NULL;
44
45         jsonObject* obj = jsonNewObject(NULL);
46
47         int status = _json_parse_string(string, &index, obj);
48         if(!status) return obj;
49
50         if(status == -2) {
51                 jsonObjectFree(obj);
52                 return NULL;
53         }
54
55         return NULL;
56 }
57
58
59 int _json_parse_string(char* string, unsigned long* index, jsonObject* obj) {
60         if( !string || !index || *index >= current_strlen) return -2;
61
62         int status = 0; /* return code from parsing routines */
63         char* classname = NULL; /* object class hint */
64         json_eat_ws(string, index, 1); /* remove leading whitespace */
65
66         char c = string[*index];
67
68         /* remove any leading comments */
69         if( c == '/' ) { 
70
71                 while(1) {
72                         (*index)++; /* move to second comment char */
73                         status = json_eat_comment(string, index, &classname, 1);
74                         if(status) return status;
75
76                         json_eat_ws(string, index, 1);
77                         c = string[*index];
78                         if(c != '/')
79                                 break;
80                 }
81         }
82
83         json_eat_ws(string, index, 1); /* remove leading whitespace */
84
85         if(*index >= current_strlen)
86                 return -2;
87
88         switch(c) {
89                                 
90                 /* json string */
91                 case '"': 
92                         (*index)++;
93                         status = json_parse_json_string(string, index, obj);
94                         break;
95
96                 /* json array */
97                 case '[':
98                         (*index)++;
99                         status = json_parse_json_array(string, index, obj);                     
100                         break;
101
102                 /* json object */
103                 case '{':
104                         (*index)++;
105                         status = json_parse_json_object(string, index, obj);
106                         break;
107
108                 /* NULL */
109                 case 'n':
110                 case 'N':
111                         status = json_parse_json_null(string, index, obj);
112                         break;
113                         
114
115                 /* true, false */
116                 case 'f':
117                 case 'F':
118                 case 't':
119                 case 'T':
120                         status = json_parse_json_bool(string, index, obj);
121                         break;
122
123                 default:
124                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
125                                 status = json_parse_json_number(string, index, obj);    
126                                 if(status) return status;
127                                 break;
128                         }
129
130                         (*index)--;
131                         /* we should never get here */
132                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
133         }       
134
135         if(status) return status;
136
137         json_eat_ws(string, index, 1);
138
139         if( *index < current_strlen ) {
140                 /* remove any trailing comments */
141                 c = string[*index];
142                 if( c == '/' ) { 
143                         (*index)++;
144                         status = json_eat_comment(string, index, NULL, 0);
145                         if(status) return status;
146                 }
147         }
148
149         if(classname){
150                 jsonObjectSetClass(obj, classname);
151                 free(classname);
152         }
153
154         return 0;
155 }
156
157
158 int json_parse_json_null(char* string, unsigned long* index, jsonObject* obj) {
159
160         if(*index >= (current_strlen - 3)) {
161                 return json_handle_error(string, index, 
162                         "_parse_json_string(): invalid null" );
163         }
164
165         if(!strncasecmp(string + (*index), "null", 4)) {
166                 (*index) += 4;
167                 obj->type = JSON_NULL;
168                 return 0;
169         } else {
170                 return json_handle_error(string, index,
171                         "_parse_json_string(): invalid null" );
172         }
173 }
174
175 /* should be at the first character of the bool at this point */
176 int json_parse_json_bool(char* string, unsigned long* index, jsonObject* obj) {
177         if( ! string || ! obj || *index >= current_strlen ) return -1;
178
179         char* ret = "json_parse_json_bool(): truncated bool";
180
181         if( *index >= (current_strlen - 5))
182                 return json_handle_error(string, index, ret);
183         
184         if(!strncasecmp( string + (*index), "false", 5)) {
185                 (*index) += 5;
186                 obj->value.b = 0;
187                 obj->type = JSON_BOOL;
188                 return 0;
189         }
190
191         if( *index >= (current_strlen - 4))
192                 return json_handle_error(string, index, ret);
193
194         if(!strncasecmp( string + (*index), "true", 4)) {
195                 (*index) += 4;
196                 obj->value.b = 1;
197                 obj->type = JSON_BOOL;
198                 return 0;
199         }
200
201         return json_handle_error(string, index, ret);
202 }
203
204
205 /* expecting the first character of the number */
206 int json_parse_json_number(char* string, unsigned long* index, jsonObject* obj) {
207         if( ! string || ! obj || *index >= current_strlen ) return -1;
208
209         growing_buffer* buf = buffer_init(64);
210         char c = string[*index];
211
212         int done = 0;
213         int dot_seen = 0;
214
215         /* negative number? */
216         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
217
218         c = string[*index];
219
220         while(*index < current_strlen) {
221
222                 if(is_number(c)) {
223                         buffer_add_char(buf, c);
224                 }
225
226                 else if( c == '.' ) {
227                         if(dot_seen) {
228                                 buffer_free(buf);
229                                 return json_handle_error(string, index, 
230                                         "json_parse_json_number(): malformed json number");
231                         }
232                         dot_seen = 1;
233                         buffer_add_char(buf, c);
234                 } else {
235                         done = 1; break;
236                 }
237
238                 (*index)++;
239                 c = string[*index];
240                 if(done) break;
241         }
242
243         obj->type = JSON_NUMBER;
244         obj->value.n = strtod(buf->buf, NULL);
245         buffer_free(buf);
246         return 0;
247 }
248
249 /* index should point to the character directly following the '['.  when done
250  * index will point to the character directly following the ']' character
251  */
252 int json_parse_json_array(char* string, unsigned long* index, jsonObject* obj) {
253
254         if( ! string || ! obj || ! index || *index >= current_strlen ) return -1;
255
256         int status = 0;
257         int in_parse = 0; /* true if this array already contains one item */
258         obj->type = JSON_ARRAY;
259         int set = 0;
260         int done = 0;
261
262         while(*index < current_strlen) {
263
264                 json_eat_ws(string, index, 1);
265
266                 if(string[*index] == ']') {
267                         (*index)++;
268                         done = 1;
269                         break;
270                 }
271
272                 if(in_parse) {
273                         json_eat_ws(string, index, 1);
274                         if(string[*index] != ',') {
275                                 return json_handle_error(string, index,
276                                         "json_parse_json_array(): array not followed by a ','");
277                         }
278                         (*index)++;
279                         json_eat_ws(string, index, 1);
280                 }
281
282                 jsonObject* item = jsonNewObject(NULL);
283
284                 #ifndef STRICT_JSON_READ
285                 if(*index < current_strlen) {
286                         if(string[*index] == ',' || string[*index] == ']') {
287                                 status = 0;
288                                 set = 1;
289                         }
290                 }
291                 if(!set) status = _json_parse_string(string, index, item);
292
293                 #else
294                 status = _json_parse_string(string, index, item);
295                 #endif
296
297                 if(status) { jsonObjectFree(item); return status; }
298                 jsonObjectPush(obj, item);
299                 in_parse = 1;
300                 set = 0;
301         }
302
303         if(!done)
304                 return json_handle_error(string, index,
305                         "json_parse_json_array(): array not closed");
306
307         return 0;
308 }
309
310
311 /* index should point to the character directly following the '{'.  when done
312  * index will point to the character directly following the '}'
313  */
314 int json_parse_json_object(char* string, unsigned long* index, jsonObject* obj) {
315         if( ! string || !obj || ! index || *index >= current_strlen ) return -1;
316
317         obj->type = JSON_HASH;
318         int status;
319         int in_parse = 0; /* true if we've already added one item to this object */
320         int set = 0;
321         int done = 0;
322
323         while(*index < current_strlen) {
324
325                 json_eat_ws(string, index, 1);
326
327                 if(string[*index] == '}') {
328                         (*index)++;
329                         done = 1;
330                         break;
331                 }
332
333                 if(in_parse) {
334                         if(string[*index] != ',') {
335                                 return json_handle_error(string, index,
336                                         "json_parse_json_object(): object missing ',' between elements" );
337                         }
338                         (*index)++;
339                         json_eat_ws(string, index, 1);
340                 }
341
342                 /* first we grab the hash key */
343                 jsonObject* key_obj = jsonNewObject(NULL);
344                 status = _json_parse_string(string, index, key_obj);
345                 if(status) return status;
346
347                 if(key_obj->type != JSON_STRING) {
348                         return json_handle_error(string, index, 
349                                 "_json_parse_json_object(): hash key not a string");
350                 }
351
352                 char* key = key_obj->value.s;
353
354                 json_eat_ws(string, index, 1);
355
356                 if(string[*index] != ':') {
357                         return json_handle_error(string, index, 
358                                 "json_parse_json_object(): hash key not followed by ':' character");
359                 }
360
361                 (*index)++;
362
363                 /* now grab the value object */
364                 json_eat_ws(string, index, 1);
365                 jsonObject* value_obj = jsonNewObject(NULL);
366
367 #ifndef STRICT_JSON_READ
368                 if(*index < current_strlen) {
369                         if(string[*index] == ',' || string[*index] == '}') {
370                                 status = 0;
371                                 set = 1;
372                         }
373                 }
374                 if(!set)
375                         status = _json_parse_string(string, index, value_obj);
376
377 #else
378                  status = _json_parse_string(string, index, value_obj);
379 #endif
380
381                 if(status) return status;
382
383                 /* put the data into the object and continue */
384                 jsonObjectSetKey(obj, key, value_obj);
385                 jsonObjectFree(key_obj);
386                 in_parse = 1;
387                 set = 0;
388         }
389
390         if(!done)
391                 return json_handle_error(string, index,
392                         "json_parse_json_object(): object not closed");
393
394         return 0;
395 }
396
397
398
399 /* when done, index will point to the character after the closing quote */
400 int json_parse_json_string(char* string, unsigned long* index, jsonObject* obj) {
401         if( ! string || ! index || *index >= current_strlen ) return -1;
402
403         int in_escape = 0;      
404         int done = 0;
405         growing_buffer* buf = buffer_init(64);
406
407         while(*index < current_strlen) {
408
409                 char c = string[*index]; 
410
411                 switch(c) {
412
413                         case '\\':
414                                 if(in_escape) {
415                                         buffer_add(buf, "\\");
416                                         in_escape = 0;
417                                 } else 
418                                         in_escape = 1;
419                                 break;
420
421                         case '"':
422                                 if(in_escape) {
423                                         buffer_add(buf, "\"");
424                                         in_escape = 0;
425                                 } else 
426                                         done = 1;
427                                 break;
428
429                         case 't':
430                                 if(in_escape) {
431                                         buffer_add(buf,"\t");
432                                         in_escape = 0;
433                                 } else 
434                                         buffer_add_char(buf, c);
435                                 break;
436
437                         case 'b':
438                                 if(in_escape) {
439                                         buffer_add(buf,"\b");
440                                         in_escape = 0;
441                                 } else 
442                                         buffer_add_char(buf, c);
443                                 break;
444
445                         case 'f':
446                                 if(in_escape) {
447                                         buffer_add(buf,"\f");
448                                         in_escape = 0;
449                                 } else 
450                                         buffer_add_char(buf, c);
451                                 break;
452
453                         case 'r':
454                                 if(in_escape) {
455                                         buffer_add(buf,"\r");
456                                         in_escape = 0;
457                                 } else 
458                                         buffer_add_char(buf, c);
459                                 break;
460
461                         case 'n':
462                                 if(in_escape) {
463                                         buffer_add(buf,"\n");
464                                         in_escape = 0;
465                                 } else 
466                                         buffer_add_char(buf, c);
467                                 break;
468
469                         case 'u':
470                                 if(in_escape) {
471                                         (*index)++;
472
473                                         if(*index >= (current_strlen - 4)) {
474                                                 buffer_free(buf);
475                                                 return json_handle_error(string, index,
476                                                         "json_parse_json_string(): truncated escaped unicode"); }
477
478                                         char buff[5];
479                                         memset(buff,0,5);
480                                         memcpy(buff, string + (*index), 4);
481
482
483                                         /* ----------------------------------------------------------------------- */
484                                         /* ----------------------------------------------------------------------- */
485                                         /* The following chunk was borrowed with permission from 
486                                                 json-c http://oss.metaparadigm.com/json-c/ */
487                                         unsigned char utf_out[3];
488                                         memset(utf_out,0,3);
489
490                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
491
492                                         unsigned int ucs_char =
493                                                 (hexdigit(string[*index] ) << 12) +
494                                                 (hexdigit(string[*index + 1]) << 8) +
495                                                 (hexdigit(string[*index + 2]) << 4) +
496                                                 hexdigit(string[*index + 3]);
497         
498                                         if (ucs_char < 0x80) {
499                                                 utf_out[0] = ucs_char;
500                                                 buffer_add(buf, utf_out);
501
502                                         } else if (ucs_char < 0x800) {
503                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
504                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
505                                                 buffer_add(buf, utf_out);
506
507                                         } else {
508                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
509                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
510                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
511                                                 buffer_add(buf, utf_out);
512                                         }
513                                         /* ----------------------------------------------------------------------- */
514                                         /* ----------------------------------------------------------------------- */
515
516                                         (*index) += 3;
517                                         in_escape = 0;
518
519                                 } else {
520
521                                         buffer_add_char(buf, c);
522                                 }
523
524                                 break;
525
526                         default:
527                                 buffer_add_char(buf, c);
528                 }
529
530                 (*index)++;
531                 if(done) break;
532         }
533
534         jsonObjectSetString(obj, buf->buf);
535         buffer_free(buf);
536         return 0;
537 }
538
539
540 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
541         if( ! string || ! index ) return;
542         if(*index >= current_strlen)
543                 return;
544
545         if( eat_all ) { /* removes newlines, etc */
546                 while(string[*index] == ' '     || 
547                                 string[*index] == '\n'  ||
548                                 string[*index] == '\t') 
549                         (*index)++;
550         }
551
552         else    
553                 while(string[*index] == ' ') (*index)++;
554 }
555
556
557 /* index should be at the '*' character at the beginning of the comment.
558  * when done, index will point to the first character after the final /
559  */
560 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
561         if( ! string || ! index || *index >= current_strlen ) return -1;
562         
563
564         if(string[*index] != '*' && string[*index] != '/' )
565                 return json_handle_error(string, index, 
566                         "json_eat_comment(): invalid character after /");
567
568         /* chop out any // style comments */
569         if(string[*index] == '/') {
570                 (*index)++;
571                 char c = string[*index];
572                 while(*index < current_strlen) {
573                         (*index)++;
574                         if(c == '\n') 
575                                 return 0;
576                         c = string[*index];
577                 }
578                 return 0;
579         }
580
581         (*index)++;
582
583         int on_star                     = 0; /* true if we just saw a '*' character */
584
585         /* we're just past the '*' */
586         if(!parse_class) { /* we're not concerned with class hints */
587                 while(*index < current_strlen) {
588                         if(string[*index] == '/') {
589                                 if(on_star) {
590                                         (*index)++;
591                                         return 0;
592                                 }
593                         }
594
595                         if(string[*index] == '*') on_star = 1;
596                         else on_star = 0;
597
598                         (*index)++;
599                 }
600                 return 0;
601         }
602
603
604
605         growing_buffer* buf = buffer_init(64);
606
607         int first_dash          = 0;
608         int second_dash = 0;
609         int third_dash          = 0;
610         int fourth_dash = 0;
611
612         int in_hint                     = 0;
613         int done                                = 0;
614
615         /*--S hint--*/   /* <-- Hints  look like this */
616         /*--E hint--*/
617
618         while(*index < current_strlen) {
619                 char c = string[*index];
620
621                 switch(c) {
622
623                         case '-':
624                                 on_star = 0;
625                                 if(third_dash)                  fourth_dash = 1;
626                                 else if(in_hint)                third_dash      = 1;
627                                 else if(first_dash)     second_dash = 1;
628                                 else                                            first_dash = 1;
629                                 break;
630
631                         case 'S':
632                                 on_star = 0;
633                                 if(second_dash && !in_hint) {
634                                         (*index)++;
635                                         json_eat_ws(string, index, 1);
636                                         (*index)--; /* this will get incremented at the bottom of the loop */
637                                         in_hint = 1;
638                                         break;
639                                 } 
640
641                                 if(second_dash && in_hint) {
642                                         buffer_add_char(buf, c);
643                                         break;
644                                 }
645
646                         case 'E':
647                                 on_star = 0;
648                                 if(second_dash && !in_hint) {
649                                         (*index)++;
650                                         json_eat_ws(string, index, 1);
651                                         (*index)--; /* this will get incremented at the bottom of the loop */
652                                         in_hint = 1;
653                                         break;
654                                 }
655
656                                 if(second_dash && in_hint) {
657                                         buffer_add_char(buf, c);
658                                         break;
659                                 }
660
661                         case '*':
662                                 on_star = 1;
663                                 break;
664
665                         case '/':
666                                 if(on_star) 
667                                         done = 1;
668                                 else
669                                 on_star = 0;
670                                 break;
671
672                         default:
673                                 on_star = 0;
674                                 if(in_hint)
675                                         buffer_add_char(buf, c);
676                 }
677
678                 (*index)++;
679                 if(done) break;
680         }
681
682         if( buf->n_used > 0 && buffer)
683                 *buffer = buffer_data(buf);
684
685         buffer_free(buf);
686         return 0;
687 }
688
689 int is_number(char c) {
690         switch(c) {
691                 case '0':
692                 case '1':
693                 case '2':
694                 case '3':
695                 case '4':
696                 case '5':
697                 case '6':
698                 case '7':
699                 case '8':
700                 case '9':
701                         return 1;
702         }
703         return 0;
704 }
705
706 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
707
708         char buf[60];
709         memset(buf, 0, 60);
710
711         if(*index > 30)
712                 strncpy( buf, string + (*index - 30), 59 );
713         else
714                 strncpy( buf, string, 59 );
715
716         fprintf(stderr, 
717                         "\nError parsing json string at charracter %c "
718                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
719                         string[*index], string[*index], *index, err_msg, buf );
720         return -1;
721 }
722
723
724 jsonObject* jsonParseFile( const char* filename ) {
725         return json_parse_file( filename );
726 }
727         
728 jsonObject* json_parse_file(const char* filename) {
729         if(!filename) return NULL;
730         char* data = file_to_string(filename);
731         jsonObject* o = json_parse_string(data);
732         free(data);
733         return o;
734 }
735
736
737
738