]> git.evergreen-ils.org Git - OpenSRF.git/blob - src/objson/json_parser.c
adding early custom json code
[OpenSRF.git] / src / objson / json_parser.c
1 #include "json_parser.h"
2
3 /* keep a copy of the length of the current json string so we don't 
4  * have to calculate it in each function
5  */
6 int current_strlen; /* XXX need to move this into the function params for thread support */
7
8 object* json_parse_string(char* string) {
9
10         if(string == NULL) {
11                 return NULL;
12         }
13
14         current_strlen = strlen(string);
15
16         if(current_strlen == 0) {
17                 return NULL;
18         }
19
20         object* obj = new_object(NULL);
21         unsigned long index = 0;
22
23         int status = _json_parse_string(string, &index, obj);
24         if(!status)
25                 return obj;
26
27         if(status == -2)
28                 return NULL;
29
30         return NULL;
31 }
32
33
34 int _json_parse_string(char* string, unsigned long* index, object* obj) {
35         assert(string && index && *index < current_strlen);
36
37         int status = 0; /* return code from parsing routines */
38         char* classname = NULL; /* object class hint */
39         json_eat_ws(string, index, 1); /* remove leading whitespace */
40
41         char c = string[*index];
42
43         /* remove any leading comments */
44         if( c == '/' ) { 
45
46                 while(1) {
47                         (*index)++; /* move to second comment char */
48                         status = json_eat_comment(string, index, &classname, 1);
49                         if(status) return status;
50
51                         json_eat_ws(string, index, 1);
52                         c = string[*index];
53                         if(c != '/')
54                                 break;
55                 }
56         }
57
58         json_eat_ws(string, index, 1); /* remove leading whitespace */
59
60         if(*index >= current_strlen)
61                 return -2;
62
63         switch(c) {
64                                 
65                 /* json string */
66                 case '"': 
67                         (*index)++;
68                         status = json_parse_json_string(string, index, obj);
69                         break;
70
71                 /* json array */
72                 case '[':
73                         (*index)++;
74                         status = json_parse_json_array(string, index, obj);                     
75                         break;
76
77                 case '{':
78                         (*index)++;
79                         status = json_parse_json_object(string, index, obj);
80                         break;
81
82                 case 'n':
83                 case 'N':
84                         status = json_parse_json_null(string, index, obj);
85                         break;
86                         
87
88                 case 'f':
89                 case 'F':
90                 case 't':
91                 case 'T':
92                         status = json_parse_json_bool(string, index, obj);
93                         break;
94
95                 /* we should never get here */
96                 default:
97                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
98                                 status = json_parse_json_number(string, index, obj);    
99                                 if(status) return status;
100                                 break;
101                         }
102
103                         (*index)--;
104                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
105         }       
106
107         if(status) return status;
108
109         json_eat_ws(string, index, 1);
110
111         if( *index < current_strlen ) {
112                 /* remove any trailing comments */
113                 c = string[*index];
114                 if( c == '/' ) { 
115                         (*index)++;
116                         status = json_eat_comment(string, index, NULL, 0);
117                         if(status) return status;
118                 }
119         }
120
121         if(classname){
122                 obj->set_class(obj, classname);
123                 free(classname);
124         }
125
126         return 0;
127 }
128
129
130 int json_parse_json_null(char* string, unsigned long* index, object* obj) {
131
132         if(*index >= (current_strlen - 3)) {
133                 return json_handle_error(string, index, 
134                         "_parse_json_string(): invalid null" );
135         }
136
137         if(!strncasecmp(string + (*index), "null", 4)) {
138                 (*index) += 4;
139                 obj->is_null = 1;
140                 return 0;
141         } else {
142                 return json_handle_error(string, index,
143                         "_parse_json_string(): invalid null" );
144         }
145 }
146
147 /* should be at the first character of the bool at this point */
148 int json_parse_json_bool(char* string, unsigned long* index, object* obj) {
149         assert(string && obj && *index < current_strlen);
150
151         char* ret = "json_parse_json_bool(): truncated bool";
152
153         if( *index >= (current_strlen - 5))
154                 return json_handle_error(string, index, ret);
155         
156         if(!strncasecmp( string + (*index), "false", 5)) {
157                 (*index) += 5;
158                 return 0;
159         }
160
161         if( *index >= (current_strlen - 4))
162                 return json_handle_error(string, index, ret);
163
164         if(!strncasecmp( string + (*index), "true", 4)) {
165                 (*index) += 4;
166                 return 0;
167         }
168
169         return json_handle_error(string, index, ret);
170 }
171
172
173 /* expecting the first character of the number */
174 int json_parse_json_number(char* string, unsigned long* index, object* obj) {
175         assert(string && obj && *index < current_strlen);
176
177         growing_buffer* buf = buffer_init(64);
178         char c = string[*index];
179
180         int done = 0;
181         int dot_seen = 0;
182
183         /* negative number? */
184         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
185
186         while(*index < current_strlen) {
187
188                 if(is_number(c))
189                         buffer_add_char(buf, c);
190
191                 else if( c == '.' ) {
192                         if(dot_seen) {
193                                 return json_handle_error(string, index, 
194                                         "json_parse_json_number(): malformed json number");
195                         }
196                         dot_seen = 1;
197                 } else {
198                         done = 1; break;
199                 }
200                 (*index)++;
201                 c = string[*index];
202                 if(done) break;
203         }
204
205         if(dot_seen) {
206                 obj->is_double = 1;
207                 obj->double_value = strtod(buf->buf, NULL);
208                 buffer_free(buf);
209                 return 0;
210
211         } else {
212                 obj->is_number = 1;
213                 obj->num_value = atol(buf->buf);
214                 buffer_free(buf);
215                 return 0;
216         }
217 }
218
219 /* index should point to the character directly following the '['.  when done
220  * index will point to the character directly following the ']' character
221  */
222 int json_parse_json_array(char* string, unsigned long* index, object* obj) {
223         assert(string && obj && index && *index < current_strlen);
224
225         int status;
226         int in_parse = 0; /* true if this array already contains one item */
227         obj->is_array = 1;
228         while(*index < current_strlen) {
229
230                 json_eat_ws(string, index, 1);
231
232                 if(string[*index] == ']') {
233                         (*index)++;
234                         break;
235                 }
236
237                 if(in_parse) {
238                         json_eat_ws(string, index, 1);
239                         if(string[*index] != ',') {
240                                 return json_handle_error(string, index,
241                                         "json_parse_json_array(): array not followed by a ','");
242                         }
243                         (*index)++;
244                         json_eat_ws(string, index, 1);
245                 }
246
247                 object* item = new_object(NULL);
248                 status = _json_parse_string(string, index, item);
249
250                 if(status) return status;
251                 obj->push(obj, item);
252                 in_parse = 1;
253         }
254
255         return 0;
256 }
257
258
259 /* index should point to the character directly following the '{'.  when done
260  * index will point to the character directly following the '}'
261  */
262 int json_parse_json_object(char* string, unsigned long* index, object* obj) {
263         assert(string && obj && index && *index < current_strlen);
264
265         obj->is_hash = 1;
266         int status;
267         int in_parse = 0; /* true if we've already added one item to this object */
268
269         while(*index < current_strlen) {
270
271                 json_eat_ws(string, index, 1);
272
273                 if(string[*index] == '}') {
274                         (*index)++;
275                         break;
276                 }
277
278                 if(in_parse) {
279                         if(string[*index] != ',') {
280                                 return json_handle_error(string, index,
281                                         "json_parse_json_object(): object missing ',' betweenn elements" );
282                         }
283                         (*index)++;
284                         json_eat_ws(string, index, 1);
285                 }
286
287                 /* first we grab the hash key */
288                 object* key_obj = new_object(NULL);
289                 status = _json_parse_string(string, index, key_obj);
290                 if(status) return status;
291
292                 if(!key_obj->is_string) {
293                         return json_handle_error(string, index, 
294                                 "_json_parse_json_object(): hash key not a string");
295                 }
296
297                 char* key = key_obj->string_data;
298
299                 json_eat_ws(string, index, 1);
300
301                 if(string[*index] != ':') {
302                         return json_handle_error(string, index, 
303                                 "json_parse_json_object(): hash key not followed by ':' character");
304                 }
305
306                 (*index)++;
307
308                 /* now grab the value object */
309                 json_eat_ws(string, index, 1);
310                 object* value_obj = new_object(NULL);
311                 status = _json_parse_string(string, index, value_obj);
312                 if(status) return status;
313
314                 /* put the data into the object and continue */
315                 obj->add_key(obj, key, value_obj);
316                 free_object(key_obj);
317                 in_parse = 1;
318         }
319         return 0;
320 }
321
322
323
324 /* when done, index will point to the character after the closing quote */
325 int json_parse_json_string(char* string, unsigned long* index, object* obj) {
326         assert(string && index && *index < current_strlen);
327
328         int in_escape = 0;      
329         int done = 0;
330         growing_buffer* buf = buffer_init(64);
331
332         while(*index < current_strlen) {
333
334                 char c = string[*index]; 
335
336                 switch(c) {
337
338                         case '\\':
339                                 if(in_escape) {
340                                         buffer_add(buf, "\\");
341                                         in_escape = 0;
342                                 } else 
343                                         in_escape = 1;
344                                 break;
345
346                         case '"':
347                                 if(in_escape) {
348                                         buffer_add(buf, "\"");
349                                         in_escape = 0;
350                                 } else 
351                                         done = 1;
352                                 break;
353
354                         case 't':
355                                 if(in_escape) {
356                                         buffer_add(buf,"\t");
357                                         in_escape = 0;
358                                 } else 
359                                         buffer_add_char(buf, c);
360                                 break;
361
362                         case 'b':
363                                 if(in_escape) {
364                                         buffer_add(buf,"\b");
365                                         in_escape = 0;
366                                 } else 
367                                         buffer_add_char(buf, c);
368                                 break;
369
370                         case 'f':
371                                 if(in_escape) {
372                                         buffer_add(buf,"\f");
373                                         in_escape = 0;
374                                 } else 
375                                         buffer_add_char(buf, c);
376                                 break;
377
378                         case 'r':
379                                 if(in_escape) {
380                                         buffer_add(buf,"\r");
381                                         in_escape = 0;
382                                 } else 
383                                         buffer_add_char(buf, c);
384                                 break;
385
386                         case 'n':
387                                 if(in_escape) {
388                                         buffer_add(buf,"\n");
389                                         in_escape = 0;
390                                 } else 
391                                         buffer_add_char(buf, c);
392                                 break;
393
394                         case 'u':
395                                 if(in_escape) {
396                                         (*index)++;
397
398                                         if(*index >= (current_strlen - 4)) {
399                                                 return json_handle_error(string, index,
400                                                         "json_parse_json_string(): truncated escaped unicode"); }
401
402                                         char buff[5];
403                                         memset(buff,0,5);
404                                         memcpy(buff, string + (*index), 4);
405
406
407                                         /* ------------------------------------------------------------------- */
408                                         /* ------------------------------------------------------------------- */
409                                         /* This was taken directly from json-c http://oss.metaparadigm.com/json-c/ */
410                                         unsigned char utf_out[3];
411                                         memset(utf_out,0,3);
412
413                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
414
415                                         unsigned int ucs_char =
416                                                 (hexdigit(string[*index] ) << 12) +
417                                                 (hexdigit(string[*index + 1]) << 8) +
418                                                 (hexdigit(string[*index + 2]) << 4) +
419                                                 hexdigit(string[*index + 3]);
420         
421                                         if (ucs_char < 0x80) {
422                                                 utf_out[0] = ucs_char;
423                                                 buffer_add(buf, utf_out);
424
425                                         } else if (ucs_char < 0x800) {
426                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
427                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
428                                                 buffer_add(buf, utf_out);
429
430                                         } else {
431                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
432                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
433                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
434                                                 buffer_add(buf, utf_out);
435                                         }
436                                         /* ------------------------------------------------------------------- */
437                                         /* ------------------------------------------------------------------- */
438
439
440                                         (*index) += 3;
441                                         in_escape = 0;
442
443                                 } else {
444
445                                         buffer_add_char(buf, c);
446                                 }
447
448                                 break;
449
450                         default:
451                                 buffer_add_char(buf, c);
452                 }
453
454                 (*index)++;
455                 if(done) break;
456         }
457
458         obj->set_string(obj, buf->buf);
459         buffer_free(buf);
460         return 0;
461 }
462
463
464 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
465         assert(string && index);
466         if(*index >= current_strlen)
467                 return;
468
469         if( eat_all ) { /* removes newlines, etc */
470                 while(string[*index] == ' '     || 
471                                 string[*index] == '\n'  ||
472                                 string[*index] == '\t') 
473                         (*index)++;
474         }
475
476         else    
477                 while(string[*index] == ' ') (*index)++;
478 }
479
480
481 /* index should be at the '*' character at the beginning of the comment.
482  * when done, index will point to the first character after the final /
483  */
484 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
485         assert(string && index && *index < current_strlen);
486
487         if(string[*index] != '*' && string[*index] != '/' )
488                 return json_handle_error(string, index, 
489                         "json_eat_comment(): invalid character after /");
490
491         /* chop out any // style comments */
492         if(string[*index] == '/') {
493                 (*index)++;
494                 char c = string[*index];
495                 while(*index < current_strlen) {
496                         (*index)++;
497                         if(c == '\n') 
498                                 return 0;
499                         c = string[*index];
500                 }
501                 return 0;
502         }
503
504         (*index)++;
505
506         int on_star                     = 0; /* true if we just saw a '*' character */
507
508         /* we're just past the '*' */
509         if(!parse_class) { /* we're not concerned with class hints */
510                 while(*index < current_strlen) {
511                         if(string[*index] == '/') {
512                                 if(on_star) {
513                                         (*index)++;
514                                         return 0;
515                                 }
516                         }
517
518                         if(string[*index] == '*') on_star = 1;
519                         else on_star = 0;
520
521                         (*index)++;
522                 }
523                 return 0;
524         }
525
526
527
528         growing_buffer* buf = buffer_init(64);
529
530         int first_dash          = 0;
531         int second_dash = 0;
532         int third_dash          = 0;
533         int fourth_dash = 0;
534
535         int in_hint                     = 0;
536         int done                                = 0;
537
538         /*--S hint--*/   /* <-- Hints  look like this */
539         /*--E hint--*/
540
541         while(*index < current_strlen) {
542                 char c = string[*index];
543
544                 switch(c) {
545
546                         case '-':
547                                 on_star = 0;
548                                 if(third_dash)                  fourth_dash = 1;
549                                 else if(in_hint)                third_dash      = 1;
550                                 else if(first_dash)     second_dash = 1;
551                                 else                                            first_dash = 1;
552                                 break;
553
554                         case 'S':
555                                 on_star = 0;
556                                 if(second_dash && !in_hint) {
557                                         (*index)++;
558                                         json_eat_ws(string, index, 1);
559                                         (*index)--; /* this will get incremented at the bottom of the loop */
560                                         in_hint = 1;
561                                         break;
562                                 }
563
564                         case 'E':
565                                 on_star = 0;
566                                 if(second_dash && !in_hint) {
567                                         (*index)++;
568                                         json_eat_ws(string, index, 1);
569                                         (*index)--; /* this will get incremented at the bottom of the loop */
570                                         in_hint = 1;
571                                         break;
572                                 }
573
574                         case '*':
575                                 on_star = 1;
576                                 break;
577
578                         case '/':
579                                 if(on_star) 
580                                         done = 1;
581                                 else
582                                 on_star = 0;
583                                 break;
584
585                         default:
586                                 on_star = 0;
587                                 if(in_hint)
588                                         buffer_add_char(buf, c);
589                 }
590
591                 (*index)++;
592                 if(done) break;
593         }
594
595         if( buf->n_used > 0 && buffer)
596                 *buffer = buffer_data(buf);
597
598         buffer_free(buf);
599         return 0;
600 }
601
602 int is_number(char c) {
603         switch(c) {
604                 case '0':
605                 case '1':
606                 case '2':
607                 case '3':
608                 case '4':
609                 case '5':
610                 case '6':
611                 case '7':
612                 case '8':
613                 case '9':
614                         return 1;
615         }
616         return 0;
617 }
618
619 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
620
621         char buf[60];
622         memset(buf, 0, 60);
623
624         if(*index > 30)
625                 strncpy( buf, string + (*index - 30), 59 );
626         else
627                 strncpy( buf, string, 59 );
628
629         fprintf(stderr, 
630                         "\nError parsing json string at charracter %c (code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
631                         string[*index], string[*index], *index, err_msg, buf );
632         return -1;
633 }
634
635