fixed some a bug in jserver, added some debug lines
[Evergreen.git] / OpenSRF / src / objson / json_parser.c
1 /*
2 Copyright (C) 2005  Georgia Public Library Service 
3 Bill Erickson <highfalutin@gmail.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 */
15
16
17 #include "json_parser.h"
18
19 /* keep a copy of the length of the current json string so we don't 
20  * have to calculate it in each function
21  */
22 int current_strlen; /* XXX need to move this into the function params for thread support */
23
24 object* json_parse_string(char* string) {
25
26         if(string == NULL) return NULL;
27
28         current_strlen = strlen(string);
29
30         if(current_strlen == 0) 
31                 return NULL;
32
33         object* obj = new_object(NULL);
34         unsigned long index = 0;
35
36         int status = _json_parse_string(string, &index, obj);
37         if(!status)
38                 return obj;
39
40         if(status == -2)
41                 return NULL;
42
43         return NULL;
44 }
45
46
47 int _json_parse_string(char* string, unsigned long* index, object* obj) {
48         assert(string && index && *index < current_strlen);
49
50         int status = 0; /* return code from parsing routines */
51         char* classname = NULL; /* object class hint */
52         json_eat_ws(string, index, 1); /* remove leading whitespace */
53
54         char c = string[*index];
55
56         /* remove any leading comments */
57         if( c == '/' ) { 
58
59                 while(1) {
60                         (*index)++; /* move to second comment char */
61                         status = json_eat_comment(string, index, &classname, 1);
62                         if(status) return status;
63
64                         json_eat_ws(string, index, 1);
65                         c = string[*index];
66                         if(c != '/')
67                                 break;
68                 }
69         }
70
71         json_eat_ws(string, index, 1); /* remove leading whitespace */
72
73         if(*index >= current_strlen)
74                 return -2;
75
76         switch(c) {
77                                 
78                 /* json string */
79                 case '"': 
80                         (*index)++;
81                         status = json_parse_json_string(string, index, obj);
82                         break;
83
84                 /* json array */
85                 case '[':
86                         (*index)++;
87                         status = json_parse_json_array(string, index, obj);                     
88                         break;
89
90                 /* json object */
91                 case '{':
92                         (*index)++;
93                         status = json_parse_json_object(string, index, obj);
94                         break;
95
96                 /* NULL */
97                 case 'n':
98                 case 'N':
99                         status = json_parse_json_null(string, index, obj);
100                         break;
101                         
102
103                 /* true, false */
104                 case 'f':
105                 case 'F':
106                 case 't':
107                 case 'T':
108                         status = json_parse_json_bool(string, index, obj);
109                         break;
110
111                 default:
112                         if(is_number(c) || c == '.' || c == '-') { /* are we a number? */
113                                 status = json_parse_json_number(string, index, obj);    
114                                 if(status) return status;
115                                 break;
116                         }
117
118                         (*index)--;
119                         /* we should never get here */
120                         return json_handle_error(string, index, "_json_parse_string() final switch clause");
121         }       
122
123         if(status) return status;
124
125         json_eat_ws(string, index, 1);
126
127         if( *index < current_strlen ) {
128                 /* remove any trailing comments */
129                 c = string[*index];
130                 if( c == '/' ) { 
131                         (*index)++;
132                         status = json_eat_comment(string, index, NULL, 0);
133                         if(status) return status;
134                 }
135         }
136
137         if(classname){
138                 obj->set_class(obj, classname);
139                 free(classname);
140         }
141
142         return 0;
143 }
144
145
146 int json_parse_json_null(char* string, unsigned long* index, object* obj) {
147
148         if(*index >= (current_strlen - 3)) {
149                 return json_handle_error(string, index, 
150                         "_parse_json_string(): invalid null" );
151         }
152
153         if(!strncasecmp(string + (*index), "null", 4)) {
154                 (*index) += 4;
155                 obj->is_null = 1;
156                 return 0;
157         } else {
158                 return json_handle_error(string, index,
159                         "_parse_json_string(): invalid null" );
160         }
161 }
162
163 /* should be at the first character of the bool at this point */
164 int json_parse_json_bool(char* string, unsigned long* index, object* obj) {
165         assert(string && obj && *index < current_strlen);
166
167         char* ret = "json_parse_json_bool(): truncated bool";
168
169         if( *index >= (current_strlen - 5))
170                 return json_handle_error(string, index, ret);
171         
172         if(!strncasecmp( string + (*index), "false", 5)) {
173                 (*index) += 5;
174                 obj->bool_value = 0;
175                 obj->is_bool = 1;
176                 obj->is_null = 0;
177                 return 0;
178         }
179
180         if( *index >= (current_strlen - 4))
181                 return json_handle_error(string, index, ret);
182
183         if(!strncasecmp( string + (*index), "true", 4)) {
184                 (*index) += 4;
185                 obj->bool_value = 1;
186                 obj->is_bool = 1;
187                 obj->is_null = 0;
188                 return 0;
189         }
190
191         return json_handle_error(string, index, ret);
192 }
193
194
195 /* expecting the first character of the number */
196 int json_parse_json_number(char* string, unsigned long* index, object* obj) {
197         assert(string && obj && *index < current_strlen);
198
199         growing_buffer* buf = buffer_init(64);
200         char c = string[*index];
201
202         int done = 0;
203         int dot_seen = 0;
204
205         /* negative number? */
206         if(c == '-') { buffer_add(buf, "-"); (*index)++; }
207
208         while(*index < current_strlen) {
209
210                 if(is_number(c))
211                         buffer_add_char(buf, c);
212
213                 else if( c == '.' ) {
214                         if(dot_seen) {
215                                 return json_handle_error(string, index, 
216                                         "json_parse_json_number(): malformed json number");
217                         }
218                         dot_seen = 1;
219                         buffer_add_char(buf, c);
220                 } else {
221                         done = 1; break;
222                 }
223                 (*index)++;
224                 c = string[*index];
225                 if(done) break;
226         }
227
228         if(dot_seen) {
229                 obj->is_double = 1;
230                 obj->is_null = 0;
231                 obj->double_value = strtod(buf->buf, NULL);
232                 buffer_free(buf);
233                 return 0;
234
235         } else {
236                 obj->is_number = 1;
237                 obj->is_null = 0;
238                 obj->num_value = atol(buf->buf);
239                 buffer_free(buf);
240                 return 0;
241         }
242 }
243
244 /* index should point to the character directly following the '['.  when done
245  * index will point to the character directly following the ']' character
246  */
247 int json_parse_json_array(char* string, unsigned long* index, object* obj) {
248         assert(string && obj && index && *index < current_strlen);
249
250         int status;
251         int in_parse = 0; /* true if this array already contains one item */
252         obj->is_array = 1;
253         obj->is_null = 0;
254         while(*index < current_strlen) {
255
256                 json_eat_ws(string, index, 1);
257
258                 if(string[*index] == ']') {
259                         (*index)++;
260                         break;
261                 }
262
263                 if(in_parse) {
264                         json_eat_ws(string, index, 1);
265                         if(string[*index] != ',') {
266                                 return json_handle_error(string, index,
267                                         "json_parse_json_array(): array not followed by a ','");
268                         }
269                         (*index)++;
270                         json_eat_ws(string, index, 1);
271                 }
272
273                 object* item = new_object(NULL);
274                 status = _json_parse_string(string, index, item);
275
276                 if(status) return status;
277                 obj->push(obj, item);
278                 in_parse = 1;
279         }
280
281         return 0;
282 }
283
284
285 /* index should point to the character directly following the '{'.  when done
286  * index will point to the character directly following the '}'
287  */
288 int json_parse_json_object(char* string, unsigned long* index, object* obj) {
289         assert(string && obj && index && *index < current_strlen);
290
291         obj->is_hash = 1;
292         obj->is_null = 0;
293         int status;
294         int in_parse = 0; /* true if we've already added one item to this object */
295
296         while(*index < current_strlen) {
297
298                 json_eat_ws(string, index, 1);
299
300                 if(string[*index] == '}') {
301                         (*index)++;
302                         break;
303                 }
304
305                 if(in_parse) {
306                         if(string[*index] != ',') {
307                                 return json_handle_error(string, index,
308                                         "json_parse_json_object(): object missing ',' betweenn elements" );
309                         }
310                         (*index)++;
311                         json_eat_ws(string, index, 1);
312                 }
313
314                 /* first we grab the hash key */
315                 object* key_obj = new_object(NULL);
316                 status = _json_parse_string(string, index, key_obj);
317                 if(status) return status;
318
319                 if(!key_obj->is_string) {
320                         return json_handle_error(string, index, 
321                                 "_json_parse_json_object(): hash key not a string");
322                 }
323
324                 char* key = key_obj->string_data;
325
326                 json_eat_ws(string, index, 1);
327
328                 if(string[*index] != ':') {
329                         return json_handle_error(string, index, 
330                                 "json_parse_json_object(): hash key not followed by ':' character");
331                 }
332
333                 (*index)++;
334
335                 /* now grab the value object */
336                 json_eat_ws(string, index, 1);
337                 object* value_obj = new_object(NULL);
338                 status = _json_parse_string(string, index, value_obj);
339                 if(status) return status;
340
341                 /* put the data into the object and continue */
342                 obj->add_key(obj, key, value_obj);
343                 free_object(key_obj);
344                 in_parse = 1;
345         }
346
347         return 0;
348 }
349
350
351
352 /* when done, index will point to the character after the closing quote */
353 int json_parse_json_string(char* string, unsigned long* index, object* obj) {
354         assert(string && index && *index < current_strlen);
355
356         int in_escape = 0;      
357         int done = 0;
358         growing_buffer* buf = buffer_init(64);
359
360         while(*index < current_strlen) {
361
362                 char c = string[*index]; 
363
364                 switch(c) {
365
366                         case '\\':
367                                 if(in_escape) {
368                                         buffer_add(buf, "\\");
369                                         in_escape = 0;
370                                 } else 
371                                         in_escape = 1;
372                                 break;
373
374                         case '"':
375                                 if(in_escape) {
376                                         buffer_add(buf, "\"");
377                                         in_escape = 0;
378                                 } else 
379                                         done = 1;
380                                 break;
381
382                         case 't':
383                                 if(in_escape) {
384                                         buffer_add(buf,"\t");
385                                         in_escape = 0;
386                                 } else 
387                                         buffer_add_char(buf, c);
388                                 break;
389
390                         case 'b':
391                                 if(in_escape) {
392                                         buffer_add(buf,"\b");
393                                         in_escape = 0;
394                                 } else 
395                                         buffer_add_char(buf, c);
396                                 break;
397
398                         case 'f':
399                                 if(in_escape) {
400                                         buffer_add(buf,"\f");
401                                         in_escape = 0;
402                                 } else 
403                                         buffer_add_char(buf, c);
404                                 break;
405
406                         case 'r':
407                                 if(in_escape) {
408                                         buffer_add(buf,"\r");
409                                         in_escape = 0;
410                                 } else 
411                                         buffer_add_char(buf, c);
412                                 break;
413
414                         case 'n':
415                                 if(in_escape) {
416                                         buffer_add(buf,"\n");
417                                         in_escape = 0;
418                                 } else 
419                                         buffer_add_char(buf, c);
420                                 break;
421
422                         case 'u':
423                                 if(in_escape) {
424                                         (*index)++;
425
426                                         if(*index >= (current_strlen - 4)) {
427                                                 return json_handle_error(string, index,
428                                                         "json_parse_json_string(): truncated escaped unicode"); }
429
430                                         char buff[5];
431                                         memset(buff,0,5);
432                                         memcpy(buff, string + (*index), 4);
433
434
435                                         /* ----------------------------------------------------------------------- */
436                                         /* ----------------------------------------------------------------------- */
437                                         /* The following chunk was borrowed with permission from 
438                                                 json-c http://oss.metaparadigm.com/json-c/ */
439                                         unsigned char utf_out[3];
440                                         memset(utf_out,0,3);
441
442                                         #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
443
444                                         unsigned int ucs_char =
445                                                 (hexdigit(string[*index] ) << 12) +
446                                                 (hexdigit(string[*index + 1]) << 8) +
447                                                 (hexdigit(string[*index + 2]) << 4) +
448                                                 hexdigit(string[*index + 3]);
449         
450                                         if (ucs_char < 0x80) {
451                                                 utf_out[0] = ucs_char;
452                                                 buffer_add(buf, utf_out);
453
454                                         } else if (ucs_char < 0x800) {
455                                                 utf_out[0] = 0xc0 | (ucs_char >> 6);
456                                                 utf_out[1] = 0x80 | (ucs_char & 0x3f);
457                                                 buffer_add(buf, utf_out);
458
459                                         } else {
460                                                 utf_out[0] = 0xe0 | (ucs_char >> 12);
461                                                 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
462                                                 utf_out[2] = 0x80 | (ucs_char & 0x3f);
463                                                 buffer_add(buf, utf_out);
464                                         }
465                                         /* ----------------------------------------------------------------------- */
466                                         /* ----------------------------------------------------------------------- */
467
468                                         (*index) += 3;
469                                         in_escape = 0;
470
471                                 } else {
472
473                                         buffer_add_char(buf, c);
474                                 }
475
476                                 break;
477
478                         default:
479                                 buffer_add_char(buf, c);
480                 }
481
482                 (*index)++;
483                 if(done) break;
484         }
485
486         obj->set_string(obj, buf->buf);
487         buffer_free(buf);
488         return 0;
489 }
490
491
492 void json_eat_ws(char* string, unsigned long* index, int eat_all) {
493         assert(string && index);
494         if(*index >= current_strlen)
495                 return;
496
497         if( eat_all ) { /* removes newlines, etc */
498                 while(string[*index] == ' '     || 
499                                 string[*index] == '\n'  ||
500                                 string[*index] == '\t') 
501                         (*index)++;
502         }
503
504         else    
505                 while(string[*index] == ' ') (*index)++;
506 }
507
508
509 /* index should be at the '*' character at the beginning of the comment.
510  * when done, index will point to the first character after the final /
511  */
512 int json_eat_comment(char* string, unsigned long* index, char** buffer, int parse_class) {
513         assert(string && index && *index < current_strlen);
514
515         if(string[*index] != '*' && string[*index] != '/' )
516                 return json_handle_error(string, index, 
517                         "json_eat_comment(): invalid character after /");
518
519         /* chop out any // style comments */
520         if(string[*index] == '/') {
521                 (*index)++;
522                 char c = string[*index];
523                 while(*index < current_strlen) {
524                         (*index)++;
525                         if(c == '\n') 
526                                 return 0;
527                         c = string[*index];
528                 }
529                 return 0;
530         }
531
532         (*index)++;
533
534         int on_star                     = 0; /* true if we just saw a '*' character */
535
536         /* we're just past the '*' */
537         if(!parse_class) { /* we're not concerned with class hints */
538                 while(*index < current_strlen) {
539                         if(string[*index] == '/') {
540                                 if(on_star) {
541                                         (*index)++;
542                                         return 0;
543                                 }
544                         }
545
546                         if(string[*index] == '*') on_star = 1;
547                         else on_star = 0;
548
549                         (*index)++;
550                 }
551                 return 0;
552         }
553
554
555
556         growing_buffer* buf = buffer_init(64);
557
558         int first_dash          = 0;
559         int second_dash = 0;
560         int third_dash          = 0;
561         int fourth_dash = 0;
562
563         int in_hint                     = 0;
564         int done                                = 0;
565
566         /*--S hint--*/   /* <-- Hints  look like this */
567         /*--E hint--*/
568
569         while(*index < current_strlen) {
570                 char c = string[*index];
571
572                 switch(c) {
573
574                         case '-':
575                                 on_star = 0;
576                                 if(third_dash)                  fourth_dash = 1;
577                                 else if(in_hint)                third_dash      = 1;
578                                 else if(first_dash)     second_dash = 1;
579                                 else                                            first_dash = 1;
580                                 break;
581
582                         case 'S':
583                                 on_star = 0;
584                                 if(second_dash && !in_hint) {
585                                         (*index)++;
586                                         json_eat_ws(string, index, 1);
587                                         (*index)--; /* this will get incremented at the bottom of the loop */
588                                         in_hint = 1;
589                                         break;
590                                 }
591
592                         case 'E':
593                                 on_star = 0;
594                                 if(second_dash && !in_hint) {
595                                         (*index)++;
596                                         json_eat_ws(string, index, 1);
597                                         (*index)--; /* this will get incremented at the bottom of the loop */
598                                         in_hint = 1;
599                                         break;
600                                 }
601
602                         case '*':
603                                 on_star = 1;
604                                 break;
605
606                         case '/':
607                                 if(on_star) 
608                                         done = 1;
609                                 else
610                                 on_star = 0;
611                                 break;
612
613                         default:
614                                 on_star = 0;
615                                 if(in_hint)
616                                         buffer_add_char(buf, c);
617                 }
618
619                 (*index)++;
620                 if(done) break;
621         }
622
623         if( buf->n_used > 0 && buffer)
624                 *buffer = buffer_data(buf);
625
626         buffer_free(buf);
627         return 0;
628 }
629
630 int is_number(char c) {
631         switch(c) {
632                 case '0':
633                 case '1':
634                 case '2':
635                 case '3':
636                 case '4':
637                 case '5':
638                 case '6':
639                 case '7':
640                 case '8':
641                 case '9':
642                         return 1;
643         }
644         return 0;
645 }
646
647 int json_handle_error(char* string, unsigned long* index, char* err_msg) {
648
649         char buf[60];
650         memset(buf, 0, 60);
651
652         if(*index > 30)
653                 strncpy( buf, string + (*index - 30), 59 );
654         else
655                 strncpy( buf, string, 59 );
656
657         fprintf(stderr, 
658                         "\nError parsing json string at charracter %c "
659                         "(code %d) and index %ld\nMsg:\t%s\nNear:\t%s\n\n", 
660                         string[*index], string[*index], *index, err_msg, buf );
661         return -1;
662 }
663
664