2 Copyright (C) 2009 Georgia Public Library Service
3 Scott McKellar <scott@esilibrary.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
20 #include <opensrf/osrf_json.h>
21 #include <opensrf/osrf_json_utils.h>
24 growing_buffer* str_buf; // for building strings
25 size_t index; // index into buffer
26 const char* buff; // client's buffer holding current chunk of input
29 // For building Unicode byte sequences
31 unsigned char buff[ 4 ];
34 static jsonObject* parse( Parser* parser );
36 static jsonObject* get_json_thing( Parser* parser, char firstc );
37 static const char* get_string( Parser* parser );
38 static jsonObject* get_number( Parser* parser, char firstc );
39 static jsonObject* get_array( Parser* parser );
40 static jsonObject* get_hash( Parser* parser );
41 static jsonObject* get_null( Parser* parser );
42 static jsonObject* get_true( Parser* parser );
43 static jsonObject* get_false( Parser* parser );
44 static int get_utf8( Parser* parser, Unibuff* unibuff );
46 static char skip_white_space( Parser* parser );
47 static inline void parser_ungetc( Parser* parser );
48 static inline char parser_nextc( Parser* parser );
49 static void report_error( Parser* parser, char badchar, char* err );
51 /* ------------------------------------- */
53 // Parse a JSON string; expand classes; construct a jsonObject.
54 // Return NULL if the JSON string is invalid.
55 jsonObject* jsonParse( const char* str ) {
59 jsonObject* obj = jsonParseRaw( str );
61 jsonObject* obj2 = NULL;
63 obj2 = jsonObjectDecodeClass( obj );
65 jsonObjectFree( obj );
70 // Parse a JSON string with variable arguments; construct a jsonObject.
71 // Return NULL if the resulting JSON string is invalid.
72 jsonObject* jsonParseFmt( const char* str, ... ) {
75 VA_LIST_TO_STRING(str);
76 return jsonParseRaw( VA_BUF );
79 // Parse a JSON string; construct a jsonObject.
80 // Return NULL if the JSON string is invalid.
81 jsonObject* jsonParseRaw( const char* s ) {
84 return NULL; // Nothing to parse
88 parser.str_buf = NULL;
92 jsonObject* obj = parse( &parser );
94 buffer_free( parser.str_buf );
98 // Parse a text string into a jsonObject.
99 static jsonObject* parse( Parser* parser ) {
101 if( ! parser->buff ) {
102 osrfLogError( OSRF_LOG_MARK, "Internal error; no input buffer available" );
103 return NULL; // Should never happen
106 jsonObject* obj = get_json_thing( parser, skip_white_space( parser ) );
109 if( obj && (c = skip_white_space( parser )) ) {
110 report_error( parser, c, "Extra material follows JSON string" );
111 jsonObjectFree( obj );
118 // Get the next JSON node -- be it string, number, hash, or whatever.
119 // Return a pointer to it if successful, or NULL if not.
120 static jsonObject* get_json_thing( Parser* parser, char firstc ) {
122 jsonObject* obj = NULL;
124 // Branch on the first character
125 if( '"' == firstc ) {
126 const char* str = get_string( parser );
128 obj = jsonNewObject( NULL );
129 obj->type = JSON_STRING;
130 obj->value.s = strdup( str );
132 } else if( '[' == firstc ) {
133 obj = get_array( parser );
134 } else if( '{' == firstc ) {
135 obj = get_hash( parser );
136 } else if( 'n' == firstc ) {
137 obj = get_null( parser );
138 } else if( 't' == firstc ) {
139 obj = get_true( parser );
140 } else if( 'f' == firstc ) {
141 obj = get_false( parser );
143 else if( isdigit( (unsigned char) firstc ) ||
149 obj = get_number( parser, firstc );
151 report_error( parser, firstc, "Unexpected character" );
157 // Collect characters from the input stream into a character
158 // string, terminated by '"'. Return a char* if successful,
160 static const char* get_string( Parser* parser ) {
162 if( parser->str_buf )
163 buffer_reset( parser->str_buf );
165 parser->str_buf = buffer_init( 64 );
167 growing_buffer* gb = parser->str_buf;
169 // Collect the characters.
170 // This is a naive implementation so far.
171 // We need to worry about UTF-8.
173 char c = parser_nextc( parser );
177 report_error( parser, parser->buff[ parser->index - 1 ],
178 "Quoted string not terminated" );
180 } else if( '\\' == c ) {
181 c = parser_nextc( parser );
183 case '"' : OSRF_BUFFER_ADD_CHAR( gb, '"' ); break;
184 case '\\' : OSRF_BUFFER_ADD_CHAR( gb, '\\' ); break;
185 case '/' : OSRF_BUFFER_ADD_CHAR( gb, '/' ); break;
186 case 'b' : OSRF_BUFFER_ADD_CHAR( gb, '\b' ); break;
187 case 'f' : OSRF_BUFFER_ADD_CHAR( gb, '\f' ); break;
188 case 'n' : OSRF_BUFFER_ADD_CHAR( gb, '\n' ); break;
189 case 'r' : OSRF_BUFFER_ADD_CHAR( gb, '\r' ); break;
190 case 't' : OSRF_BUFFER_ADD_CHAR( gb, '\t' ); break;
193 if( get_utf8( parser, &unibuff ) ) {
194 return NULL; // bad UTF-8
195 } else if( unibuff.buff[0] ) {
196 OSRF_BUFFER_ADD( gb, (char*) unibuff.buff );
198 report_error( parser, 'u', "Unicode sequence encodes a nul byte" );
203 default : OSRF_BUFFER_ADD_CHAR( gb, c ); break;
207 OSRF_BUFFER_ADD_CHAR( gb, c );
210 return OSRF_BUFFER_C_STR( gb );
213 // We found what looks like the first character of a number.
214 // Collect all the eligible characters, and verify that they
215 // are numeric (possibly after some scrubbing). Return a
216 // pointer to a JSON_NUMBER if successful, or NULL if not.
217 static jsonObject* get_number( Parser* parser, char firstc ) {
219 growing_buffer* gb = buffer_init( 32 );
220 OSRF_BUFFER_ADD_CHAR( gb, firstc );
225 c = parser_nextc( parser );
226 if( isdigit( (unsigned char) c ) ||
232 OSRF_BUFFER_ADD_CHAR( gb, c );
234 if( ! isspace( (unsigned char) c ) )
235 parser_ungetc( parser );
240 char* s = buffer_release( gb );
241 if( ! jsonIsNumeric( s ) ) {
242 char* temp = jsonScrubNumber( s );
246 report_error( parser, parser->buff[ parser->index - 1 ],
247 "Invalid numeric format" );
252 jsonObject* obj = jsonNewObject( NULL );
253 obj->type = JSON_NUMBER;
259 // We found a '['. Create a JSON_ARRAY with all its subordinates.
260 static jsonObject* get_array( Parser* parser ) {
262 jsonObject* array = jsonNewObjectType( JSON_ARRAY );
264 char c = skip_white_space( parser );
266 return array; // Empty array
269 jsonObject* obj = get_json_thing( parser, c );
271 jsonObjectFree( array );
272 return NULL; // Failed to get anything
275 // Add the entry to the array
276 jsonObjectPush( array, obj );
278 // Look for a comma or right bracket
279 c = skip_white_space( parser );
282 else if( c != ',' ) {
283 report_error( parser, c, "Expected comma or bracket in array; didn't find it\n" );
284 jsonObjectFree( array );
287 c = skip_white_space( parser );
293 // We found '{' Get a JSON_HASH, with all its subordinates.
294 static jsonObject* get_hash( Parser* parser ) {
295 jsonObject* hash = jsonNewObjectType( JSON_HASH );
297 char c = skip_white_space( parser );
299 return hash; // Empty hash
303 // Get the key string
305 report_error( parser, c,
306 "Expected quotation mark to begin hash key; didn't find it\n" );
307 jsonObjectFree( hash );
311 const char* key = get_string( parser );
313 jsonObjectFree( hash );
316 char* key_copy = strdup( key );
318 if( jsonObjectGetKey( hash, key_copy ) ) {
319 report_error( parser, '"', "Duplicate key in JSON object" );
320 jsonObjectFree( hash );
325 c = skip_white_space( parser );
327 report_error( parser, c,
328 "Expected colon after hash key; didn't find it\n" );
330 jsonObjectFree( hash );
334 // Get the associated value
335 jsonObject* obj = get_json_thing( parser, skip_white_space( parser ) );
338 jsonObjectFree( hash );
342 // Add a new entry to the hash
343 jsonObjectSetKey( hash, key_copy, obj );
346 // Look for comma or right brace
347 c = skip_white_space( parser );
350 else if( c != ',' ) {
351 report_error( parser, c,
352 "Expected comma or brace in hash, didn't find it" );
353 jsonObjectFree( hash );
356 c = skip_white_space( parser );
362 // We found an 'n'. Verify that the next four characters are "ull",
363 // and that there are no further characters in the token.
364 static jsonObject* get_null( Parser* parser ) {
366 if( parser_nextc( parser ) != 'u' ||
367 parser_nextc( parser ) != 'l' ||
368 parser_nextc( parser ) != 'l' ) {
369 report_error( parser, parser->buff[ parser->index - 1 ],
370 "Expected \"ull\" to follow \"n\"; didn't find it" );
374 // Sneak a peek at the next character
375 // to make sure that it's kosher
376 char c = parser_nextc( parser );
377 if( ! isspace( (unsigned char) c ) )
378 parser_ungetc( parser );
380 if( isalnum( (unsigned char) c ) ) {
381 report_error( parser, c,
382 "Found letter or number after \"null\"" );
386 // Everythings okay. Return a JSON_BOOL.
387 return jsonNewObject( NULL );
390 // We found a 't'. Verify that the next four characters are "rue",
391 // and that there are no further characters in the token.
392 static jsonObject* get_true( Parser* parser ) {
394 if( parser_nextc( parser ) != 'r' ||
395 parser_nextc( parser ) != 'u' ||
396 parser_nextc( parser ) != 'e' ) {
397 report_error( parser, parser->buff[ parser->index - 1 ],
398 "Expected \"rue\" to follow \"t\"; didn't find it" );
402 // Sneak a peek at the next character
403 // to make sure that it's kosher
404 char c = parser_nextc( parser );
405 if( ! isspace( (unsigned char) c ) )
406 parser_ungetc( parser );
408 if( isalnum( (unsigned char) c ) ) {
409 report_error( parser, c,
410 "Found letter or number after \"true\"" );
414 // Everythings okay. Return a JSON_NULL.
415 return jsonNewBoolObject( 1 );
418 // We found an 'f'. Verify that the next four characters are "alse",
419 // and that there are no further characters in the token.
420 static jsonObject* get_false( Parser* parser ) {
422 if( parser_nextc( parser ) != 'a' ||
423 parser_nextc( parser ) != 'l' ||
424 parser_nextc( parser ) != 's' ||
425 parser_nextc( parser ) != 'e' ) {
426 report_error( parser, parser->buff[ parser->index - 1 ],
427 "Expected \"alse\" to follow \"f\"; didn't find it" );
431 // Sneak a peek at the next character
432 // to make sure that it's kosher
433 char c = parser_nextc( parser );
434 if( ! isspace( (unsigned char) c ) )
435 parser_ungetc( parser );
437 if( isalnum( (unsigned char) c ) ) {
438 report_error( parser, c,
439 "Found letter or number after \"false\"" );
443 // Everythings okay. Return a JSON_BOOL.
444 return jsonNewBoolObject( 0 );
447 // We found \u. Grab the next 4 characters, confirm that they are hex,
448 // and convert them to Unicode.
449 static int get_utf8( Parser* parser, Unibuff* unibuff ) {
453 // Accumulate four characters into a buffer. Make sure that
454 // there are four of them, and that they're all hex.
455 for( i = 0; i < 4; ++i ) {
456 int c = parser_nextc( parser );
458 report_error( parser, 'u', "Incomplete Unicode sequence" );
459 unibuff->buff[ 0 ] = '\0';
461 } else if( ! isxdigit( (unsigned char) c ) ) {
462 report_error( parser, c, "Non-hex byte found in Unicode sequence" );
463 unibuff->buff[ 0 ] = '\0';
470 /* The following code is adapted with permission from
471 * json-c http://oss.metaparadigm.com/json-c/
473 #define hexdigit(x) ( ((x) <= '9') ? (x) - '0' : ((x) & 7) + 9)
475 // Convert the hex sequence into a single integer
476 unsigned int ucs_char =
477 (hexdigit(ubuff[ 0 ]) << 12) +
478 (hexdigit(ubuff[ 1 ]) << 8) +
479 (hexdigit(ubuff[ 2 ]) << 4) +
480 hexdigit(ubuff[ 3 ]);
482 unsigned char* utf_out = unibuff->buff;
484 if (ucs_char < 0x80) {
485 utf_out[0] = ucs_char;
488 } else if (ucs_char < 0x800) {
489 utf_out[0] = 0xc0 | (ucs_char >> 6);
490 utf_out[1] = 0x80 | (ucs_char & 0x3f);
494 utf_out[0] = 0xe0 | (ucs_char >> 12);
495 utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
496 utf_out[2] = 0x80 | (ucs_char & 0x3f);
503 // Return the next non-whitespace character in the input stream.
504 static char skip_white_space( Parser* parser ) {
507 c = parser_nextc( parser );
508 } while( isspace( (unsigned char) c ) );
513 // Put a character back into the input stream.
514 // It is the responsibility of the caller not to back up
515 // past the beginning of the input string.
516 static inline void parser_ungetc( Parser* parser ) {
520 // Get the next character. It is the responsibility of
521 //the caller not to read past the end of the input string.
522 static inline char parser_nextc( Parser* parser ) {
523 return parser->buff[ parser->index++ ];
526 // Report a syntax error to standard error.
527 static void report_error( Parser* parser, char badchar, char* err ) {
529 // Determine the beginning and ending points of a JSON
530 // fragment to display, from the vicinity of the error
532 const int max_margin = 15; // How many characters to show
533 // on either side of the error
534 int pre = parser->index - max_margin;
538 int post = parser->index + 15;
539 if( '\0' == parser->buff[ parser->index ] ) {
540 post = parser->index - 1;
542 int remaining = strlen(parser->buff + parser->index);
543 if( remaining < max_margin )
544 post = parser->index + remaining;
547 // Copy the fragment into a buffer
549 int len = post - pre + 1; // length of fragment
551 memcpy( buf, parser->buff + pre, len );
554 // Replace newlines and tabs with spaces
557 if( '\n' == *p || '\t' == *p )
562 // Avoid trying to display a nul character
563 if( '\0' == badchar )
567 osrfLogError( OSRF_LOG_MARK,
568 "*JSON Parser Error\n - char = %c\n "
569 "- index = %d\n - near => %s\n - %s",
570 badchar, parser->index, buf, err );