]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/objson/json_parser.c
fixed a number of bugs regarding number parsing
[OpenSRF.git] / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24 object* json_parse_string(char* string) {
25
26         if(string == NULL) return NULL;
27
28         current_strlen = strlen(string);
29
30         if(current_strlen == 0) 
31                 return NULL;
32
33         object* obj = new_object(NULL);
34         unsigned long index = 0;
35
36         int status = _json_parse_string(string, &index, obj);
37         if(!status)
38                 return obj;
39
40         if(status == -2)
41                 return NULL;
42
43         return NULL;
44 }
45
46
47 int _json_parse_string(char* string, unsigned long* index, object* obj) {
48         assert(string && index && *index < current_strlen);
49
50         int status = 0; /* return code from parsing routines */
51         char* classname = NULL; /* object class hint */
52         json_eat_ws(string, index, 1); /* remove leading whitespace */
53
54         char c = string[*index];
55
56         /* remove any leading comments */
57         if( c == '/' ) { 
58
59                 while(1) {
60                         (*index)++; /* move to second comment char */
61                         status = json_eat_comment(string, index, &classname, 1);
62                         if(status) return status;
63
64                         json_eat_ws(string, index, 1);
65                         c = string[*index];
66                         if(c != '/')
67                                 break;
68                 }
69         }
70
71         json_eat_ws(string, index, 1); /* remove leading whitespace */
72
73         if(*index >= current_strlen)
74                 return -2;
75
76         switch(c) {
77                                 
78                 /* json string */
79                 case '"': 
80                         (*index)++;
81                         status = json_parse_json_string(string, index, obj);
82                         break;
83
84                 /* json array */
85                 case '[':
86                         (*index)++;
87                         status = json_parse_json_array(string, index, obj);                     
88                         break;
89
90                 /* json object */
91                 case '{':
92                         (*index)++;
93                         status = json_parse_json_object(string, index, obj);
94                         break;
95
96                 /* NULL */
97                 case 'n':
98                 case 'N':
99                         status = json_parse_json_null(string, index, obj);
100                         break;
101                         
102
103                 /* true, false */
104                 case 'f':
105                 case 'F':
106                 case 't':
107                 case 'T':
108                         status = json_parse_json_bool(string, index, obj);
109                         break;
110
111                 default:
112                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
113                                 status = json_parse_json_number(string, index, obj);    
114                                 if(status) return status;
115                                 break;
116                         }
117
118                         (*index)--;
119                         /* we should never get here */
120                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
121         }       
122
123         if(status) return status;
124
125         json_eat_ws(string, index, 1);
126
127         if( *index < current_strlen ) {
128                 /* remove any trailing comments */
129                 c = string[*index];
130                 if( c == '/' ) { 
131                         (*index)++;
132                         status = json_eat_comment(string, index, NULL, 0);
133                         if(status) return status;
134                 }
135         }
136
137         if(classname){
138                 obj->set_class(obj, classname);
139                 free(classname);
140         }
141
142         return 0;
143 }
144
145
146 int json_parse_json_null(char* string, unsigned long* index, object* obj) {
147
148         if(*index >= (current_strlen - 3)) {
149                 return json_handle_error(string, index, 
150                         "_parse_json_string(): invalid null" );
151         }
152
153         if(!strncasecmp(string + (*index), "null", 4)) {
154                 (*index) += 4;
155                 obj->is_null = 1;
156                 return 0;
157         } else {
158                 return json_handle_error(string, index,
159                         "_parse_json_string(): invalid null" );
160         }
161 }
162
163 /* should be at the first character of the bool at this point */
164 int json_parse_json_bool(char* string, unsigned long* index, object* obj) {
165         assert(string && obj && *index < current_strlen);
166
167         char* ret = "json_parse_json_bool(): truncated bool";
168
169         if( *index >= (current_strlen - 5))
170                 return json_handle_error(string, index, ret);
171         
172         if(!strncasecmp( string + (*index), "false", 5)) {
173                 (*index) += 5;
174                 return 0;
175         }
176
177         if( *index >= (current_strlen - 4))
178                 return json_handle_error(string, index, ret);
179
180         if(!strncasecmp( string + (*index), "true", 4)) {
181                 (*index) += 4;
182                 return 0;
183         }
184
185         return json_handle_error(string, index, ret);
186 }
187
188
189 /* expecting the first character of the number */
190 int json_parse_json_number(char* string, unsigned long* index, object* obj) {
191         assert(string && obj && *index < current_strlen);
192
193         growing_buffer* buf = buffer_init(64);
194         char c = string[*index];
195
196         int done = 0;
197         int dot_seen = 0;
198
199         /* negative number? */
200         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
201
202         while(*index < current_strlen) {
203
204                 if(is_number(c))
205                         buffer_add_char(buf, c);
206
207                 else if( c == '.' ) {
208                         if(dot_seen) {
209                                 return json_handle_error(string, index, 
210                                         "json_parse_json_number(): malformed json number");
211                         }
212                         dot_seen = 1;
213                         buffer_add_char(buf, c);
214                 } else {
215                         done = 1; break;
216                 }
217                 (*index)++;
218                 c = string[*index];
219                 if(done) break;
220         }
221
222         if(dot_seen) {
223                 obj->is_double = 1;
224                 obj->double_value = strtod(buf->buf, NULL);
225                 buffer_free(buf);
226                 return 0;
227
228         } else {
229                 obj->is_number = 1;
230                 obj->num_value = atol(buf->buf);
231                 buffer_free(buf);
232                 return 0;
233         }
234 }
235
236 /* index should point to the character directly following the '['.  when done
237  * index will point to the character directly following the ']' character
238  */
239 int json_parse_json_array(char* string, unsigned long* index, object* obj) {
240         assert(string && obj && index && *index < current_strlen);
241
242         int status;
243         int in_parse = 0; /* true if this array already contains one item */
244         obj->is_array = 1;
245         while(*index < current_strlen) {
246
247                 json_eat_ws(string, index, 1);
248
249                 if(string[*index] == ']') {
250                         (*index)++;
251                         break;
252                 }
253
254                 if(in_parse) {
255                         json_eat_ws(string, index, 1);
256                         if(string[*index] != ',') {
257                                 return json_handle_error(string, index,
258                                         "json_parse_json_array(): array not followed by a ','");
259                         }
260                         (*index)++;
261                         json_eat_ws(string, index, 1);
262                 }
263
264                 object* item = new_object(NULL);
265                 status = _json_parse_string(string, index, item);
266
267                 if(status) return status;
268                 obj->push(obj, item);
269                 in_parse = 1;
270         }
271
272         return 0;
273 }
274
275
276 /* index should point to the character directly following the '{'.  when done
277  * index will point to the character directly following the '}'
278  */
279 int json_parse_json_object(char* string, unsigned long* index, object* obj) {
280         assert(string && obj && index && *index < current_strlen);
281
282         obj->is_hash = 1;
283         int status;
284         int in_parse = 0; /* true if we've already added one item to this object */
285
286         while(*index < current_strlen) {
287
288                 json_eat_ws(string, index, 1);
289
290                 if(string[*index] == '}') {
291                         (*index)++;
292                         break;
293                 }
294
295                 if(in_parse) {
296                         if(string[*index] != ',') {
297                                 return json_handle_error(string, index,
298                                         "json_parse_json_object(): object missing ',' betweenn elements" );
299                         }
300                         (*index)++;
301                         json_eat_ws(string, index, 1);
302                 }
303
304                 /* first we grab the hash key */
305                 object* key_obj = new_object(NULL);
306                 status = _json_parse_string(string, index, key_obj);
307                 if(status) return status;
308
309                 if(!key_obj->is_string) {
310                         return json_handle_error(string, index, 
311                                 "_json_parse_json_object(): hash key not a string");
312                 }
313
314                 char* key = key_obj->string_data;
315
316                 json_eat_ws(string, index, 1);
317
318                 if(string[*index] != ':') {
319                         return json_handle_error(string, index, 
320                                 "json_parse_json_object(): hash key not followed by ':' character");
321                 }
322
323                 (*index)++;
324
325                 /* now grab the value object */
326                 json_eat_ws(string, index, 1);
327                 object* value_obj = new_object(NULL);
328                 status = _json_parse_string(string, index, value_obj);
329                 if(status) return status;
330
331                 /* put the data into the object and continue */
332                 obj->add_key(obj, key, value_obj);
333                 free_object(key_obj);
334                 in_parse = 1;
335         }
336         return 0;
337 }
338
339
340
341 /* when done, index will point to the character after the closing quote */
342 int json_parse_json_string(char* string, unsigned long* index, object* obj) {
343         assert(string && index && *index < current_strlen);
344
345         int in_escape = 0;      
346         int done = 0;
347         growing_buffer* buf = buffer_init(64);
348
349         while(*index < current_strlen) {
350
351                 char c = string[*index]; 
352
353                 switch(c) {
354
355                         case '\\':
356                                 if(in_escape) {
357                                         buffer_add(buf, "\\");
358                                         in_escape = 0;
359                                 } else 
360                                         in_escape = 1;
361                                 break;
362
363                         case '"':
364                                 if(in_escape) {
365                                         buffer_add(buf, "\"");
366                                         in_escape = 0;
367                                 } else 
368                                         done = 1;
369                                 break;
370
371                         case 't':
372                                 if(in_escape) {
373                                         buffer_add(buf,"\t");
374                                         in_escape = 0;
375                                 } else 
376                                         buffer_add_char(buf, c);
377                                 break;
378
379                         case 'b':
380                                 if(in_escape) {
381                                         buffer_add(buf,"\b");
382                                         in_escape = 0;
383                                 } else 
384                                         buffer_add_char(buf, c);
385                                 break;
386
387                         case 'f':
388                                 if(in_escape) {
389                                         buffer_add(buf,"\f");
390                                         in_escape = 0;
391                                 } else 
392                                         buffer_add_char(buf, c);
393                                 break;
394
395                         case 'r':
396                                 if(in_escape) {
397                                         buffer_add(buf,"\r");
398                                         in_escape = 0;
399                                 } else 
400                                         buffer_add_char(buf, c);
401                                 break;
402
403                         case 'n':
404                                 if(in_escape) {
405                                         buffer_add(buf,"\n");
406                                         in_escape = 0;
407                                 } else 
408                                         buffer_add_char(buf, c);
409                                 break;
410
411                         case 'u':
412                                 if(in_escape) {
413                                         (*index)++;
414
415                                         if(*index >= (current_strlen - 4)) {
416                                                 return json_handle_error(string, index,
417                                                         "json_parse_json_string(): truncated escaped unicode"); }
418
419                                         char buff[5];
420                                         memset(buff,0,5);
421                                         memcpy(buff, string + (*index), 4);
422
423
424                                         /* ----------------------------------------------------------------------- */
425                                         /* ----------------------------------------------------------------------- */
426                                         /* The following chunk was borrowed with permission from 
427                                                 json-c http://oss.metaparadigm.com/json-c/ */
428                                         unsigned char utf_out[3];
429                                         memset(utf_out,0,3);
430
431                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
432
433                                         unsigned int ucs_char =
434                                                 (hexdigit(string[*index] ) << 12) +
435                                                 (hexdigit(string[*index + 1]) << 8) +
436                                                 (hexdigit(string[*index + 2]) << 4) +
437                                                 hexdigit(string[*index + 3]);
438         
439                                         if (ucs_char < 0x80) {
440                                                 utf_out[0] = ucs_char;
441                                                 buffer_add(buf, utf_out);
442
443                                         } else if (ucs_char < 0x800) {
444                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
445                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
446                                                 buffer_add(buf, utf_out);
447
448                                         } else {
449                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
450                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
451                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
452                                                 buffer_add(buf, utf_out);
453                                         }
454                                         /* ----------------------------------------------------------------------- */
455                                         /* ----------------------------------------------------------------------- */
456
457                                         (*index) += 3;
458                                         in_escape = 0;
459
460                                 } else {
461
462                                         buffer_add_char(buf, c);
463                                 }
464
465                                 break;
466
467                         default:
468                                 buffer_add_char(buf, c);
469                 }
470
471                 (*index)++;
472                 if(done) break;
473         }
474
475         obj->set_string(obj, buf->buf);
476         buffer_free(buf);
477         return 0;
478 }
479
480
481 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
482         assert(string && index);
483         if(*index >= current_strlen)
484                 return;
485
486         if( eat_all ) { /* removes newlines, etc */
487                 while(string[*index] == ' '     || 
488                                 string[*index] == '\n'  ||
489                                 string[*index] == '\t') 
490                         (*index)++;
491         }
492
493         else    
494                 while(string[*index] == ' ') (*index)++;
495 }
496
497
498 /* index should be at the '*' character at the beginning of the comment.
499  * when done, index will point to the first character after the final /
500  */
501 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
502         assert(string && index && *index < current_strlen);
503
504         if(string[*index] != '*' && string[*index] != '/' )
505                 return json_handle_error(string, index, 
506                         "json_eat_comment(): invalid character after /");
507
508         /* chop out any // style comments */
509         if(string[*index] == '/') {
510                 (*index)++;
511                 char c = string[*index];
512                 while(*index < current_strlen) {
513                         (*index)++;
514                         if(c == '\n') 
515                                 return 0;
516                         c = string[*index];
517                 }
518                 return 0;
519         }
520
521         (*index)++;
522
523         int on_star                     = 0; /* true if we just saw a '*' character */
524
525         /* we're just past the '*' */
526         if(!parse_class) { /* we're not concerned with class hints */
527                 while(*index < current_strlen) {
528                         if(string[*index] == '/') {
529                                 if(on_star) {
530                                         (*index)++;
531                                         return 0;
532                                 }
533                         }
534
535                         if(string[*index] == '*') on_star = 1;
536                         else on_star = 0;
537
538                         (*index)++;
539                 }
540                 return 0;
541         }
542
543
544
545         growing_buffer* buf = buffer_init(64);
546
547         int first_dash          = 0;
548         int second_dash = 0;
549         int third_dash          = 0;
550         int fourth_dash = 0;
551
552         int in_hint                     = 0;
553         int done                                = 0;
554
555         /*--S hint--*/   /* <-- Hints  look like this */
556         /*--E hint--*/
557
558         while(*index < current_strlen) {
559                 char c = string[*index];
560
561                 switch(c) {
562
563                         case '-':
564                                 on_star = 0;
565                                 if(third_dash)                  fourth_dash = 1;
566                                 else if(in_hint)                third_dash      = 1;
567                                 else if(first_dash)     second_dash = 1;
568                                 else                                            first_dash = 1;
569                                 break;
570
571                         case 'S':
572                                 on_star = 0;
573                                 if(second_dash && !in_hint) {
574                                         (*index)++;
575                                         json_eat_ws(string, index, 1);
576                                         (*index)--; /* this will get incremented at the bottom of the loop */
577                                         in_hint = 1;
578                                         break;
579                                 }
580
581                         case 'E':
582                                 on_star = 0;
583                                 if(second_dash && !in_hint) {
584                                         (*index)++;
585                                         json_eat_ws(string, index, 1);
586                                         (*index)--; /* this will get incremented at the bottom of the loop */
587                                         in_hint = 1;
588                                         break;
589                                 }
590
591                         case '*':
592                                 on_star = 1;
593                                 break;
594
595                         case '/':
596                                 if(on_star) 
597                                         done = 1;
598                                 else
599                                 on_star = 0;
600                                 break;
601
602                         default:
603                                 on_star = 0;
604                                 if(in_hint)
605                                         buffer_add_char(buf, c);
606                 }
607
608                 (*index)++;
609                 if(done) break;
610         }
611
612         if( buf->n_used > 0 && buffer)
613                 *buffer = buffer_data(buf);
614
615         buffer_free(buf);
616         return 0;
617 }
618
619 int is_number(char c) {
620         switch(c) {
621                 case '0':
622                 case '1':
623                 case '2':
624                 case '3':
625                 case '4':
626                 case '5':
627                 case '6':
628                 case '7':
629                 case '8':
630                 case '9':
631                         return 1;
632         }
633         return 0;
634 }
635
636 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
637
638         char buf[60];
639         memset(buf, 0, 60);
640
641         if(*index > 30)
642                 strncpy( buf, string + (*index - 30), 59 );
643         else
644                 strncpy( buf, string, 59 );
645
646         fprintf(stderr, 
647                         "\nError parsing json string at charracter %c "
648                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
649                         string[*index], string[*index], *index, err_msg, buf );
650         return -1;
651 }
652
653