]> git.evergreen-ils.org Git - Evergreen.git/blob - OpenSRF/src/objson/json_parser.c
added license info and some additional comments
[Evergreen.git] / OpenSRF / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24 object* json_parse_string(char* string) {
25
26         if(string == NULL) return NULL;
27
28         current_strlen = strlen(string);
29
30         if(current_strlen == 0) 
31                 return NULL;
32
33         object* obj = new_object(NULL);
34         unsigned long index = 0;
35
36         int status = _json_parse_string(string, &index, obj);
37         if(!status)
38                 return obj;
39
40         if(status == -2)
41                 return NULL;
42
43         return NULL;
44 }
45
46
47 int _json_parse_string(char* string, unsigned long* index, object* obj) {
48         assert(string && index && *index < current_strlen);
49
50         int status = 0; /* return code from parsing routines */
51         char* classname = NULL; /* object class hint */
52         json_eat_ws(string, index, 1); /* remove leading whitespace */
53
54         char c = string[*index];
55
56         /* remove any leading comments */
57         if( c == '/' ) { 
58
59                 while(1) {
60                         (*index)++; /* move to second comment char */
61                         status = json_eat_comment(string, index, &classname, 1);
62                         if(status) return status;
63
64                         json_eat_ws(string, index, 1);
65                         c = string[*index];
66                         if(c != '/')
67                                 break;
68                 }
69         }
70
71         json_eat_ws(string, index, 1); /* remove leading whitespace */
72
73         if(*index >= current_strlen)
74                 return -2;
75
76         switch(c) {
77                                 
78                 /* json string */
79                 case '"': 
80                         (*index)++;
81                         status = json_parse_json_string(string, index, obj);
82                         break;
83
84                 /* json array */
85                 case '[':
86                         (*index)++;
87                         status = json_parse_json_array(string, index, obj);                     
88                         break;
89
90                 /* json object */
91                 case '{':
92                         (*index)++;
93                         status = json_parse_json_object(string, index, obj);
94                         break;
95
96                 /* NULL */
97                 case 'n':
98                 case 'N':
99                         status = json_parse_json_null(string, index, obj);
100                         break;
101                         
102
103                 /* true, false */
104                 case 'f':
105                 case 'F':
106                 case 't':
107                 case 'T':
108                         status = json_parse_json_bool(string, index, obj);
109                         break;
110
111                 default:
112                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
113                                 status = json_parse_json_number(string, index, obj);    
114                                 if(status) return status;
115                                 break;
116                         }
117
118                         (*index)--;
119                         /* we should never get here */
120                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
121         }       
122
123         if(status) return status;
124
125         json_eat_ws(string, index, 1);
126
127         if( *index < current_strlen ) {
128                 /* remove any trailing comments */
129                 c = string[*index];
130                 if( c == '/' ) { 
131                         (*index)++;
132                         status = json_eat_comment(string, index, NULL, 0);
133                         if(status) return status;
134                 }
135         }
136
137         if(classname){
138                 obj->set_class(obj, classname);
139                 free(classname);
140         }
141
142         return 0;
143 }
144
145
146 int json_parse_json_null(char* string, unsigned long* index, object* obj) {
147
148         if(*index >= (current_strlen - 3)) {
149                 return json_handle_error(string, index, 
150                         "_parse_json_string(): invalid null" );
151         }
152
153         if(!strncasecmp(string + (*index), "null", 4)) {
154                 (*index) += 4;
155                 obj->is_null = 1;
156                 return 0;
157         } else {
158                 return json_handle_error(string, index,
159                         "_parse_json_string(): invalid null" );
160         }
161 }
162
163 /* should be at the first character of the bool at this point */
164 int json_parse_json_bool(char* string, unsigned long* index, object* obj) {
165         assert(string && obj && *index < current_strlen);
166
167         char* ret = "json_parse_json_bool(): truncated bool";
168
169         if( *index >= (current_strlen - 5))
170                 return json_handle_error(string, index, ret);
171         
172         if(!strncasecmp( string + (*index), "false", 5)) {
173                 (*index) += 5;
174                 return 0;
175         }
176
177         if( *index >= (current_strlen - 4))
178                 return json_handle_error(string, index, ret);
179
180         if(!strncasecmp( string + (*index), "true", 4)) {
181                 (*index) += 4;
182                 return 0;
183         }
184
185         return json_handle_error(string, index, ret);
186 }
187
188
189 /* expecting the first character of the number */
190 int json_parse_json_number(char* string, unsigned long* index, object* obj) {
191         assert(string && obj && *index < current_strlen);
192
193         growing_buffer* buf = buffer_init(64);
194         char c = string[*index];
195
196         int done = 0;
197         int dot_seen = 0;
198
199         /* negative number? */
200         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
201
202         while(*index < current_strlen) {
203
204                 if(is_number(c))
205                         buffer_add_char(buf, c);
206
207                 else if( c == '.' ) {
208                         if(dot_seen) {
209                                 return json_handle_error(string, index, 
210                                         "json_parse_json_number(): malformed json number");
211                         }
212                         dot_seen = 1;
213                 } else {
214                         done = 1; break;
215                 }
216                 (*index)++;
217                 c = string[*index];
218                 if(done) break;
219         }
220
221         if(dot_seen) {
222                 obj->is_double = 1;
223                 obj->double_value = strtod(buf->buf, NULL);
224                 buffer_free(buf);
225                 return 0;
226
227         } else {
228                 obj->is_number = 1;
229                 obj->num_value = atol(buf->buf);
230                 buffer_free(buf);
231                 return 0;
232         }
233 }
234
235 /* index should point to the character directly following the '['.  when done
236  * index will point to the character directly following the ']' character
237  */
238 int json_parse_json_array(char* string, unsigned long* index, object* obj) {
239         assert(string && obj && index && *index < current_strlen);
240
241         int status;
242         int in_parse = 0; /* true if this array already contains one item */
243         obj->is_array = 1;
244         while(*index < current_strlen) {
245
246                 json_eat_ws(string, index, 1);
247
248                 if(string[*index] == ']') {
249                         (*index)++;
250                         break;
251                 }
252
253                 if(in_parse) {
254                         json_eat_ws(string, index, 1);
255                         if(string[*index] != ',') {
256                                 return json_handle_error(string, index,
257                                         "json_parse_json_array(): array not followed by a ','");
258                         }
259                         (*index)++;
260                         json_eat_ws(string, index, 1);
261                 }
262
263                 object* item = new_object(NULL);
264                 status = _json_parse_string(string, index, item);
265
266                 if(status) return status;
267                 obj->push(obj, item);
268                 in_parse = 1;
269         }
270
271         return 0;
272 }
273
274
275 /* index should point to the character directly following the '{'.  when done
276  * index will point to the character directly following the '}'
277  */
278 int json_parse_json_object(char* string, unsigned long* index, object* obj) {
279         assert(string && obj && index && *index < current_strlen);
280
281         obj->is_hash = 1;
282         int status;
283         int in_parse = 0; /* true if we've already added one item to this object */
284
285         while(*index < current_strlen) {
286
287                 json_eat_ws(string, index, 1);
288
289                 if(string[*index] == '}') {
290                         (*index)++;
291                         break;
292                 }
293
294                 if(in_parse) {
295                         if(string[*index] != ',') {
296                                 return json_handle_error(string, index,
297                                         "json_parse_json_object(): object missing ',' betweenn elements" );
298                         }
299                         (*index)++;
300                         json_eat_ws(string, index, 1);
301                 }
302
303                 /* first we grab the hash key */
304                 object* key_obj = new_object(NULL);
305                 status = _json_parse_string(string, index, key_obj);
306                 if(status) return status;
307
308                 if(!key_obj->is_string) {
309                         return json_handle_error(string, index, 
310                                 "_json_parse_json_object(): hash key not a string");
311                 }
312
313                 char* key = key_obj->string_data;
314
315                 json_eat_ws(string, index, 1);
316
317                 if(string[*index] != ':') {
318                         return json_handle_error(string, index, 
319                                 "json_parse_json_object(): hash key not followed by ':' character");
320                 }
321
322                 (*index)++;
323
324                 /* now grab the value object */
325                 json_eat_ws(string, index, 1);
326                 object* value_obj = new_object(NULL);
327                 status = _json_parse_string(string, index, value_obj);
328                 if(status) return status;
329
330                 /* put the data into the object and continue */
331                 obj->add_key(obj, key, value_obj);
332                 free_object(key_obj);
333                 in_parse = 1;
334         }
335         return 0;
336 }
337
338
339
340 /* when done, index will point to the character after the closing quote */
341 int json_parse_json_string(char* string, unsigned long* index, object* obj) {
342         assert(string && index && *index < current_strlen);
343
344         int in_escape = 0;      
345         int done = 0;
346         growing_buffer* buf = buffer_init(64);
347
348         while(*index < current_strlen) {
349
350                 char c = string[*index]; 
351
352                 switch(c) {
353
354                         case '\\':
355                                 if(in_escape) {
356                                         buffer_add(buf, "\\");
357                                         in_escape = 0;
358                                 } else 
359                                         in_escape = 1;
360                                 break;
361
362                         case '"':
363                                 if(in_escape) {
364                                         buffer_add(buf, "\"");
365                                         in_escape = 0;
366                                 } else 
367                                         done = 1;
368                                 break;
369
370                         case 't':
371                                 if(in_escape) {
372                                         buffer_add(buf,"\t");
373                                         in_escape = 0;
374                                 } else 
375                                         buffer_add_char(buf, c);
376                                 break;
377
378                         case 'b':
379                                 if(in_escape) {
380                                         buffer_add(buf,"\b");
381                                         in_escape = 0;
382                                 } else 
383                                         buffer_add_char(buf, c);
384                                 break;
385
386                         case 'f':
387                                 if(in_escape) {
388                                         buffer_add(buf,"\f");
389                                         in_escape = 0;
390                                 } else 
391                                         buffer_add_char(buf, c);
392                                 break;
393
394                         case 'r':
395                                 if(in_escape) {
396                                         buffer_add(buf,"\r");
397                                         in_escape = 0;
398                                 } else 
399                                         buffer_add_char(buf, c);
400                                 break;
401
402                         case 'n':
403                                 if(in_escape) {
404                                         buffer_add(buf,"\n");
405                                         in_escape = 0;
406                                 } else 
407                                         buffer_add_char(buf, c);
408                                 break;
409
410                         case 'u':
411                                 if(in_escape) {
412                                         (*index)++;
413
414                                         if(*index >= (current_strlen - 4)) {
415                                                 return json_handle_error(string, index,
416                                                         "json_parse_json_string(): truncated escaped unicode"); }
417
418                                         char buff[5];
419                                         memset(buff,0,5);
420                                         memcpy(buff, string + (*index), 4);
421
422
423                                         /* ----------------------------------------------------------------------- */
424                                         /* ----------------------------------------------------------------------- */
425                                         /* The following chunk was borrowed with permission from 
426                                                 json-c http://oss.metaparadigm.com/json-c/ */
427                                         unsigned char utf_out[3];
428                                         memset(utf_out,0,3);
429
430                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
431
432                                         unsigned int ucs_char =
433                                                 (hexdigit(string[*index] ) << 12) +
434                                                 (hexdigit(string[*index + 1]) << 8) +
435                                                 (hexdigit(string[*index + 2]) << 4) +
436                                                 hexdigit(string[*index + 3]);
437         
438                                         if (ucs_char < 0x80) {
439                                                 utf_out[0] = ucs_char;
440                                                 buffer_add(buf, utf_out);
441
442                                         } else if (ucs_char < 0x800) {
443                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
444                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
445                                                 buffer_add(buf, utf_out);
446
447                                         } else {
448                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
449                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
450                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
451                                                 buffer_add(buf, utf_out);
452                                         }
453                                         /* ----------------------------------------------------------------------- */
454                                         /* ----------------------------------------------------------------------- */
455
456                                         (*index) += 3;
457                                         in_escape = 0;
458
459                                 } else {
460
461                                         buffer_add_char(buf, c);
462                                 }
463
464                                 break;
465
466                         default:
467                                 buffer_add_char(buf, c);
468                 }
469
470                 (*index)++;
471                 if(done) break;
472         }
473
474         obj->set_string(obj, buf->buf);
475         buffer_free(buf);
476         return 0;
477 }
478
479
480 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
481         assert(string && index);
482         if(*index >= current_strlen)
483                 return;
484
485         if( eat_all ) { /* removes newlines, etc */
486                 while(string[*index] == ' '     || 
487                                 string[*index] == '\n'  ||
488                                 string[*index] == '\t') 
489                         (*index)++;
490         }
491
492         else    
493                 while(string[*index] == ' ') (*index)++;
494 }
495
496
497 /* index should be at the '*' character at the beginning of the comment.
498  * when done, index will point to the first character after the final /
499  */
500 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
501         assert(string && index && *index < current_strlen);
502
503         if(string[*index] != '*' && string[*index] != '/' )
504                 return json_handle_error(string, index, 
505                         "json_eat_comment(): invalid character after /");
506
507         /* chop out any // style comments */
508         if(string[*index] == '/') {
509                 (*index)++;
510                 char c = string[*index];
511                 while(*index < current_strlen) {
512                         (*index)++;
513                         if(c == '\n') 
514                                 return 0;
515                         c = string[*index];
516                 }
517                 return 0;
518         }
519
520         (*index)++;
521
522         int on_star                     = 0; /* true if we just saw a '*' character */
523
524         /* we're just past the '*' */
525         if(!parse_class) { /* we're not concerned with class hints */
526                 while(*index < current_strlen) {
527                         if(string[*index] == '/') {
528                                 if(on_star) {
529                                         (*index)++;
530                                         return 0;
531                                 }
532                         }
533
534                         if(string[*index] == '*') on_star = 1;
535                         else on_star = 0;
536
537                         (*index)++;
538                 }
539                 return 0;
540         }
541
542
543
544         growing_buffer* buf = buffer_init(64);
545
546         int first_dash          = 0;
547         int second_dash = 0;
548         int third_dash          = 0;
549         int fourth_dash = 0;
550
551         int in_hint                     = 0;
552         int done                                = 0;
553
554         /*--S hint--*/   /* <-- Hints  look like this */
555         /*--E hint--*/
556
557         while(*index < current_strlen) {
558                 char c = string[*index];
559
560                 switch(c) {
561
562                         case '-':
563                                 on_star = 0;
564                                 if(third_dash)                  fourth_dash = 1;
565                                 else if(in_hint)                third_dash      = 1;
566                                 else if(first_dash)     second_dash = 1;
567                                 else                                            first_dash = 1;
568                                 break;
569
570                         case 'S':
571                                 on_star = 0;
572                                 if(second_dash && !in_hint) {
573                                         (*index)++;
574                                         json_eat_ws(string, index, 1);
575                                         (*index)--; /* this will get incremented at the bottom of the loop */
576                                         in_hint = 1;
577                                         break;
578                                 }
579
580                         case 'E':
581                                 on_star = 0;
582                                 if(second_dash && !in_hint) {
583                                         (*index)++;
584                                         json_eat_ws(string, index, 1);
585                                         (*index)--; /* this will get incremented at the bottom of the loop */
586                                         in_hint = 1;
587                                         break;
588                                 }
589
590                         case '*':
591                                 on_star = 1;
592                                 break;
593
594                         case '/':
595                                 if(on_star) 
596                                         done = 1;
597                                 else
598                                 on_star = 0;
599                                 break;
600
601                         default:
602                                 on_star = 0;
603                                 if(in_hint)
604                                         buffer_add_char(buf, c);
605                 }
606
607                 (*index)++;
608                 if(done) break;
609         }
610
611         if( buf->n_used > 0 && buffer)
612                 *buffer = buffer_data(buf);
613
614         buffer_free(buf);
615         return 0;
616 }
617
618 int is_number(char c) {
619         switch(c) {
620                 case '0':
621                 case '1':
622                 case '2':
623                 case '3':
624                 case '4':
625                 case '5':
626                 case '6':
627                 case '7':
628                 case '8':
629                 case '9':
630                         return 1;
631         }
632         return 0;
633 }
634
635 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
636
637         char buf[60];
638         memset(buf, 0, 60);
639
640         if(*index > 30)
641                 strncpy( buf, string + (*index - 30), 59 );
642         else
643                 strncpy( buf, string, 59 );
644
645         fprintf(stderr, 
646                         "\nError parsing json string at charracter %c "
647                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
648                         string[*index], string[*index], *index, err_msg, buf );
649         return -1;
650 }
651
652