2 Copyright (C) 2009 Equinox Software Inc.
3 Scott McKellar <scott@esilibrary.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
18 @brief Push parser for JSON.
20 This parser parses JSON incrementally, without necessarily holding the entire JSON string
21 (or any representation thereof) in memory at once. It is therefore suitable for parsing
24 A format such as JSON, with its arbitrarily nestable elements, cries out piteously for a
25 recursive descent parser to match the recursive structure of the format. Unfortunately,
26 recursive descent doesn't work for an incremental parser, because the boundaries of
27 incoming chunks don't respect syntactic boundaries.
29 This parser is based on a finite state automaton, using a structure to retain state across
30 chunks, and a stack to simulate recursion. The calling code designates a series of
31 callback functions to respond to various syntactic features as they are encountered.
38 #include "opensrf/osrf_json.h"
39 #include "opensrf/jsonpush.h"
41 /** Enumeration of states for a finite state automaton */
43 PP_BEGIN, // outside of any JSON
44 PP_STR, // inside a string literal
45 PP_SLASH, // found a backslash in a string literal
46 PP_UTF8, // collecting a UTF8 sequence
47 PP_NUM, // inside a numeric literal
48 PP_ARRAY_BEGIN, // started an array
49 PP_ARRAY_VALUE, // found an array element
50 PP_ARRAY_COMMA, // found a comma between array elements
51 PP_OBJ_BEGIN, // started a JSON object
52 PP_OBJ_KEY, // found a string for a key in an object
53 PP_OBJ_COLON, // found a colon after a key in an object
54 PP_OBJ_VALUE, // found a value for a key in an object
55 PP_OBJ_COMMA, // found a comma separating entries in an object
56 PP_TRUE, // true keyword
57 PP_FALSE, // false keyword
58 PP_NULL, // null keyword
59 PP_END, // reached the end of the JSON stream
60 PP_ERROR // encountered invalid JSON; can't continue
63 struct StateNodeStruct;
64 typedef struct StateNodeStruct StateNode;
67 @brief Represents a parser state at a given level of nesting.
69 The parser maintains a stack of StateNodes to simulate recursive descent.
71 struct StateNodeStruct {
72 StateNode* next; /**< For a linked list to implement the stack */
73 PPState state; /**< State to which we will return */
74 osrfStringArray* keylist; /**< List of key strings, if the level is for a JSON object */
78 @brief A collection of things the parser needs to remember about what it's doing.
80 This structure enables the parser to retain state from one chunk of input to the next.
82 struct JSONPushParserStruct {
83 JSONHandlerMap handlers;
84 void* blob; /**< To be passed back to callback functions. */
85 unsigned line; /**< Line number. */
86 unsigned pos; /**< Character position within line. */
87 PPState state; /**< For finite state automaton. */
88 char again; /**< If non-zero, re-read it as the next character. */
89 growing_buffer* buf; /**< For accumulating strings and numbers. */
90 StateNode* state_stack; /**< For simulating recursive descent. */
91 StateNode* free_states; /**< Free list of unused StateNodes. */
92 unsigned word_idx; /**< index of current characters keyword,
93 such as "true", "false", or "null". */
94 unsigned int point_code; /**< for UTF-8 transformations. */
95 osrfStringArray* keylist; /**< Stores keys in current JSON object. */
98 // State handlers for the finite state automaton
99 static int do_begin( JSONPushParser* parser, char c );
100 static int do_str ( JSONPushParser* parser, char c );
101 static int do_slash( JSONPushParser* parser, char c );
102 static int do_utf8 ( JSONPushParser* parser, char c );
103 static int do_num ( JSONPushParser* parser, char c );
104 static int do_array_begin( JSONPushParser* parser, char c );
105 static int do_array_value( JSONPushParser* parser, char c );
106 static int do_array_comma( JSONPushParser* parser, char c );
107 static int do_obj_begin( JSONPushParser* parser, char c );
108 static int do_obj_key ( JSONPushParser* parser, char c );
109 static int do_obj_colon( JSONPushParser* parser, char c );
110 static int do_obj_value( JSONPushParser* parser, char c );
111 static int do_obj_comma( JSONPushParser* parser, char c );
112 static int do_true ( JSONPushParser* parser, char c );
113 static int do_false( JSONPushParser* parser, char c );
114 static int do_null ( JSONPushParser* parser, char c );
115 static int do_end( JSONPushParser* parser, char c );
117 static int found_keyword( JSONPushParser* parser, char c,
118 const char* keyword, unsigned maxlen );
119 static void push_pp_state( JSONPushParser* parser, PPState state );
120 static void pop_pp_state( JSONPushParser* parser );
121 static void check_pp_end( JSONPushParser* parser );
122 static void report_pp_error( JSONPushParser* parser, const char* msg, ... );
125 @brief Create a new JSONPushParser.
126 @param map Pointer to a JSONHandlerMap designating the callback functions to call.
127 @param blob An arbitrary pointer to be passed to the callback functions.
128 @return A pointer to the new parser.
130 The calling code can use the @a blob parameter to specify its own context for the
133 The calling code is responsible for freeing the parser by calling jsonPushParserFree().
135 JSONPushParser* jsonNewPushParser( const JSONHandlerMap* map, void* blob )
140 JSONPushParser* parser = safe_malloc( sizeof( JSONPushParser ) );
141 parser->handlers = *map;
145 parser->state = PP_BEGIN;
146 parser->again = '\0';
147 parser->buf = buffer_init( 64 );
148 parser->state_stack = NULL;
149 parser->free_states = NULL;
150 parser->word_idx = 0;
151 parser->keylist = osrfNewStringArray( 8 );
156 @brief Restore a JSONPushParser to its original pristine state.
157 @param parser Pointer to the JSONPushParser to be reset.
159 This function makes it possible to reuse the same parser for multiple documents, e.g.
160 multiple input files, without having to destroy and recreate it. The expectation is
161 that it be called after jsonPush() returns.
163 void jsonPushParserReset( JSONPushParser* parser ) {
167 parser->state = PP_BEGIN;
172 @brief Restore a JSONPushParser to a starting state.
173 @param parser Pointer to the JSONPushParser to be resumed.
175 This function is similar to jsonPushParserReset(), with two exceptions:
176 - It only works if the parser is between JSON values. Otherwise it wouldn't be able
177 to continue sensibly.
178 - It doesn't reset the line number or position number used for error messages.
180 Purpose: make it possible to parse multiple JSON values in the same stream. The
181 expectation is that it be called by the callback function that responds to end-of-JSON.
183 void jsonPushParserResume( JSONPushParser* parser ) {
185 parser->state = PP_BEGIN;
190 @brief Tell the JSON push parser that there is no more input to parse.
191 @param parser Pointer to the parser.
192 @return 0 if successful, or 1 upon error.
194 A call to this function is comparable to an end-of-file marker. Without it, the parser
195 would be unable to recognize certain tokens at the very end of the last buffer, because
196 it wouldn't know that the token was finished.
198 For example: if the last byte is part of a number, the parser will not have reported the
199 numeric token because it was waiting to see if the next character was numeric.
201 Likewise, certain kinds of errors would be unrecognizable, such as a failure to complete
202 the current JSON expression.
204 int jsonPushParserFinish( JSONPushParser* parser ) {
207 // If we're currently accumulating a token, finish it
208 if( PP_NUM == parser->state ) {
209 const char* num_str = OSRF_BUFFER_C_STR( parser->buf );
212 if( jsonIsNumeric( num_str ) ) {
213 if( parser->handlers.handleNumber )
214 rc = parser->handlers.handleNumber( parser->blob, num_str );
215 pop_pp_state( parser );
216 check_pp_end( parser );
217 } else { // Not numeric? Try to fix it
218 char* temp = jsonScrubNumber( num_str );
219 if( temp ) { // Fixed
220 if( parser->handlers.handleNumber )
221 rc = parser->handlers.handleNumber( parser->blob, temp );
223 pop_pp_state( parser );
224 check_pp_end( parser );
225 } else { // Can't be fixed
226 report_pp_error( parser, "Invalid number: \"%s\"", num_str );
228 parser->state = PP_ERROR;
231 } else if( PP_TRUE == parser->state ) {
232 if( 3 == parser->word_idx ) {
233 if( parser->handlers.handleBool )
234 rc = parser->handlers.handleBool( parser->blob, 1 );
236 report_pp_error( parser, "Keyword \"true\" is incomplete at end of input" );
237 printf( "Wordlen = %d\n", parser->word_idx );
239 parser->state = PP_ERROR;
241 pop_pp_state( parser );
242 check_pp_end( parser );
243 } else if( PP_FALSE == parser->state ) {
244 if( 4 == parser->word_idx ) {
245 if( parser->handlers.handleBool )
246 rc = parser->handlers.handleBool( parser->blob, 0 );
248 report_pp_error( parser, "Keyword \"false\" is incomplete at end of input" );
250 parser->state = PP_ERROR;
252 pop_pp_state( parser );
253 check_pp_end( parser );
254 } else if( PP_NULL == parser->state ) {
255 if( 3 == parser->word_idx ) {
256 if( parser->handlers.handleNull )
257 rc = parser->handlers.handleNull( parser->blob );
259 report_pp_error( parser, "Keyword \"null\" is incomplete at end of input" );
261 parser->state = PP_ERROR;
263 pop_pp_state( parser );
264 check_pp_end( parser );
267 // At this point the state should be PP_END, or possibly PP_BEGIN if the JSON value is
268 // empty, or PP_ERROR if we already encountered an error. Anything else means that the
269 // JSON value is incomplete.
271 switch( parser->state ) {
273 parser->state = PP_END; // JSON value was empty
278 report_pp_error( parser, "String literal not closed" );
279 parser->state = PP_ERROR;
282 case PP_NUM : // not possible
284 case PP_ARRAY_BEGIN :
285 report_pp_error( parser, "Empty JSON array not closed" );
286 parser->state = PP_ERROR;
289 case PP_ARRAY_VALUE :
290 report_pp_error( parser, "JSON array begun but not closed" );
291 parser->state = PP_ERROR;
294 case PP_ARRAY_COMMA :
295 report_pp_error( parser, "JSON array not closed" );
296 parser->state = PP_ERROR;
300 report_pp_error( parser, "Empty JSON object not closed" );
301 parser->state = PP_ERROR;
305 report_pp_error( parser, "JSON object not continued after key" );
306 parser->state = PP_ERROR;
310 report_pp_error( parser, "JSON object not continued after colon" );
311 parser->state = PP_ERROR;
315 report_pp_error( parser, "JSON object begun but not closed" );
316 parser->state = PP_ERROR;
320 report_pp_error( parser, "JSON object not closed" );
321 parser->state = PP_ERROR;
324 case PP_TRUE : // not possible
325 case PP_FALSE : // not possible
326 case PP_NULL : // not possible
327 case PP_END : // okay
328 case PP_ERROR : // previous error, presumably already reported
336 @brief Incrementally parse a chunk of JSON.
337 @param parser Pointer to the JSONPushParser that will do the parsing.
338 @param str Pointer to a chunk of JSON, either all or part of a JSON stream.
339 @param length Length of the chunk of JSON.
340 @return 0 if successful, or 1 upon error.
342 Parse a fragment of JSON, possibly preceded or followed by one or more other chunks
343 in the same JSON stream. Respond to various syntactical features by calling the
344 corresponding callback functions that were designated when the parser was created.
346 int jsonPush( JSONPushParser* parser, const char* str, size_t length ) {
350 report_pp_error( parser, "JSON parser received a NULL parameter for input" );
352 } else if( PP_ERROR == parser->state ) {
353 report_pp_error( parser, "JSON parser cannot continue due to previous error" );
358 // Loop through the chunk
360 while( str[i] && i < length && parser->state != PP_ERROR ) {
361 // branch on the current parser state
362 switch( parser->state ) {
364 rc = do_begin( parser, str[i] );
367 rc = do_str( parser, str[i] );
370 rc = do_slash( parser, str[i] );
373 rc = do_utf8( parser, str[i] );
376 rc = do_num( parser, str[i] );
378 case PP_ARRAY_BEGIN :
379 rc = do_array_begin( parser, str[i] );
381 case PP_ARRAY_VALUE :
382 rc = do_array_value( parser, str[i] );
384 case PP_ARRAY_COMMA :
385 rc = do_array_comma( parser, str[i] );
388 rc = do_obj_begin( parser, str[i] );
391 rc = do_obj_key( parser, str[i] );
394 rc = do_obj_colon( parser, str[i] );
397 rc = do_obj_value( parser, str[i] );
400 rc = do_obj_comma( parser, str[i] );
403 rc = do_true( parser, str[i] );
406 rc = do_false( parser, str[i] );
409 rc = do_null( parser, str[i] );
412 rc = do_end( parser, str[i] );
415 break; // stub for now; should be error
419 else if( parser->again )
420 parser->again = '\0'; // reuse the current character
422 // Advance to the next character
424 if( '\n' == str[i] ) {
433 parser->state = PP_ERROR;
438 // -------- Beginning of state handlers --------------------------
441 @brief Look for the beginning of a JSON value.
442 @param parser Pointer to the current JSONPushParser.
443 @param c The current input character.
444 @return 0 if successful, or 1 upon error.
446 After some optional leading white space, look for a value comprising the entire
449 static int do_begin( JSONPushParser* parser, char c ) {
451 if( isspace( (unsigned char) c ) ) // skip white space
453 else if( '\"' == c ) { // Found a string
454 buffer_reset( parser->buf );
455 push_pp_state( parser, PP_END );
456 parser->state = PP_STR;
457 } else if( '[' == c ) { // Found an array
458 if( parser->handlers.handleBeginArray )
459 rc = parser->handlers.handleBeginArray( parser->blob );
460 push_pp_state( parser, PP_END );
461 parser->state = PP_ARRAY_BEGIN;
462 } else if( '{' == c ) { // Found an object
463 if( parser->handlers.handleBeginObj )
464 rc = parser->handlers.handleBeginObj( parser->blob );
465 push_pp_state( parser, PP_END );
466 parser->state = PP_OBJ_BEGIN;
467 } else if( 't' == c ) {
468 push_pp_state( parser, PP_END );
469 parser->word_idx = 0;
470 parser->state = PP_TRUE;
471 } else if( 'f' == c ) {
472 push_pp_state( parser, PP_END );
473 parser->word_idx = 0;
474 parser->state = PP_FALSE;
475 } else if( 'n' == c ) {
476 push_pp_state( parser, PP_END );
477 parser->word_idx = 0;
478 parser->state = PP_NULL;
479 } else if( isdigit( (unsigned char) c )
485 || 'E' == c ) { // Found a number
486 buffer_reset( parser->buf );
487 buffer_add_char( parser->buf, c );
488 push_pp_state( parser, PP_END );
489 parser->state = PP_NUM;
491 report_pp_error( parser, "Unexpected character \'%c\' at beginning of JSON string", c );
499 @brief Accumulate characters in a string literal.
500 @param parser Pointer to the current JSONPushParser.
501 @param c The current input character.
502 @return 0 if successful, or 1 upon error.
504 static int do_str ( JSONPushParser* parser, char c ) {
507 // Reached the end of the string. Report it either as a string
508 // or as a key, depending on the context.
509 pop_pp_state( parser );
510 if( PP_OBJ_KEY == parser->state ) { // Report as a key
511 const char* key = OSRF_BUFFER_C_STR( parser->buf );
512 if( osrfStringArrayContains( parser->keylist, key ) ) {
513 report_pp_error( parser, "Duplicate key \"%s\" in JSON object", key );
516 osrfStringArrayAdd( parser->keylist, key );
517 if( parser->handlers.handleObjKey ) {
518 rc = parser->handlers.handleObjKey(
522 } else { // Report as a string
523 if( parser->handlers.handleString ) {
524 rc = parser->handlers.handleString(
525 parser->blob, OSRF_BUFFER_C_STR( parser->buf ) );
527 check_pp_end( parser );
529 } else if( '\\' == c ) {
530 parser->state = PP_SLASH; // Handle an escaped special character
531 } else if( iscntrl( (unsigned char) c ) || ! isprint( (unsigned char) c ) ) {
532 report_pp_error( parser, "Illegal character 0x%02X in string literal",
536 buffer_add_char( parser->buf, c );
543 @brief Look for an escaped special character.
544 @param parser Pointer to the current JSONPushParser.
545 @param c The current input character.
546 @return 0 if successful, or 1 upon error.
548 static int do_slash( JSONPushParser* parser, char c ) {
553 OSRF_BUFFER_ADD_CHAR( parser->buf, '\"' );
554 parser->state = PP_STR;
557 OSRF_BUFFER_ADD_CHAR( parser->buf, '\\' );
558 parser->state = PP_STR;
561 OSRF_BUFFER_ADD_CHAR( parser->buf, '/' );
562 parser->state = PP_STR;
565 OSRF_BUFFER_ADD_CHAR( parser->buf, '\b' );
566 parser->state = PP_STR;
569 OSRF_BUFFER_ADD_CHAR( parser->buf, '\f' );
570 parser->state = PP_STR;
573 OSRF_BUFFER_ADD_CHAR( parser->buf, '\n' );
574 parser->state = PP_STR;
577 OSRF_BUFFER_ADD_CHAR( parser->buf, '\r' );
578 parser->state = PP_STR;
581 OSRF_BUFFER_ADD_CHAR( parser->buf, '\t' );
582 parser->state = PP_STR;
585 parser->word_idx = 0;
586 parser->point_code = 0;
587 parser->state = PP_UTF8;
590 report_pp_error( parser,
591 "Unexpected character '%c' escaped by preceding backslash", c );
600 @brief Accumulate and convert hex digits into a multibyte UTF-8 character.
601 @param parser Pointer to the current JSONPushParser.
602 @param c The current input character (should be a hex digit).
603 @return 0 if successful, or 1 upon error.
605 Convert each character to the corresponding numeric value and incorporate it into a sum.
606 When all four characters have been accumulated, translate the result into a multibyte
607 UTF-8 character and append it to the buffer.
609 The algorithm for converting the input character into a numeric value assumes that the
610 the characters [a-f] and [A-F] are contiguous in the execution character set, and that
611 the lower 4 bits for 'a' and 'A' are 0001. Those assumptions are true for ASCII and
612 EBCDIC, but there may be some character sets for which it is not true.
614 static int do_utf8( JSONPushParser* parser, char c ) {
617 if( isxdigit( (unsigned char) c ) ) {
618 // Convert the numeric character to a hex value
619 unsigned char hex = (c <= '9') ? c - '0' : (c & 7) + 9;
621 // Branch according to how many characters we have so far
622 switch( parser->word_idx ) {
624 parser->point_code += hex << 12;
628 parser->point_code += hex << 8;
632 parser->point_code += hex << 4;
636 // We have all four hex characters. Now finish the
637 // point code and translate it to a UTF-8 character.
638 unsigned int point_code = parser->point_code + hex;
639 unsigned char ubuf[ 4 ];
641 if (point_code < 0x80) {
642 ubuf[0] = point_code;
645 } else if (point_code < 0x800) {
646 ubuf[0] = 0xc0 | (point_code >> 6);
647 ubuf[1] = 0x80 | (point_code & 0x3f);
651 ubuf[0] = 0xe0 | (point_code >> 12);
652 ubuf[1] = 0x80 | ((point_code >> 6) & 0x3f);
653 ubuf[2] = 0x80 | (point_code & 0x3f);
658 // Append the UTF-8 sequence to the buffer
659 OSRF_BUFFER_ADD( parser->buf, (char*) ubuf );
660 parser->state = PP_STR;
662 report_pp_error( parser, "UTF-8 sequence codes for nul character" );
668 report_pp_error( parser, "Non-hex character '%c' found in UTF-8 sequence", c );
676 @brief Accumulate characters into a numeric literal.
677 @param parser Pointer to the current JSONPushParser.
678 @param c The current input character.
679 @return 0 if successful, or 1 upon error.
681 Once we see a character that doesn't belong in a numeric literal, we check to make sure
682 that the characters we accumulate are a well-formed number according to JSON rules. If
683 they aren't, we try to massage them into something valid (e.g. by removing a leading
684 plus sign, which official JSON doesn't allow).
686 static int do_num ( JSONPushParser* parser, char c ) {
689 if( isdigit( (unsigned char) c )
696 buffer_add_char( parser->buf, c );
698 const char* num_str = OSRF_BUFFER_C_STR( parser->buf );
701 if( jsonIsNumeric( num_str ) ) {
702 if( parser->handlers.handleNumber )
703 rc = parser->handlers.handleNumber( parser->blob, num_str );
705 pop_pp_state( parser );
706 check_pp_end( parser );
707 } else { // Not valid? Try to fix it
708 char* temp = jsonScrubNumber( num_str );
709 if( temp ) { // Fixed
710 if( parser->handlers.handleNumber )
711 rc = parser->handlers.handleNumber( parser->blob, temp );
714 pop_pp_state( parser );
715 check_pp_end( parser );
716 } else { // Can't be fixed
717 report_pp_error( parser, "Invalid number: \"%s\"", num_str );
726 @brief Look for the first element of a JSON array, or the end of the array.
727 @param parser Pointer to the current JSONPushParser.
728 @param c The current input character.
729 @return 0 if successful, or 1 upon error.
731 We have just entered a JSON array. We expect to see either a value or (in the case of
732 an empty array) a closing brace. Anything else is an error.
734 static int do_array_begin( JSONPushParser* parser, char c ) {
736 if( isspace( (unsigned char) c ) ) // skip white space
738 else if( '\"' == c ) { // Found a string
739 buffer_reset( parser->buf );
740 push_pp_state( parser, PP_ARRAY_VALUE );
741 parser->state = PP_STR;
742 } else if( '[' == c ) { // Found a nested array
743 if( parser->handlers.handleBeginArray )
744 rc = parser->handlers.handleBeginArray( parser->blob );
745 push_pp_state( parser, PP_ARRAY_VALUE );
746 parser->state = PP_ARRAY_BEGIN;
747 } else if( '{' == c ) { // Found a nested object
748 if( parser->handlers.handleBeginObj )
749 rc = parser->handlers.handleBeginObj( parser->blob );
750 push_pp_state( parser, PP_ARRAY_VALUE );
751 parser->state = PP_OBJ_BEGIN;
752 } else if( ']' == c ) { // End of array
753 if( parser->handlers.handleEndArray )
754 rc = parser->handlers.handleEndArray( parser->blob );
755 pop_pp_state( parser );
756 check_pp_end( parser );
757 } else if( 't' == c ) {
758 push_pp_state( parser, PP_ARRAY_VALUE );
759 parser->word_idx = 0;
760 parser->state = PP_TRUE;
761 } else if( 'f' == c ) {
762 push_pp_state( parser, PP_ARRAY_VALUE );
763 parser->word_idx = 0;
764 parser->state = PP_FALSE;
765 } else if( 'n' == c ) {
766 push_pp_state( parser, PP_ARRAY_VALUE );
767 parser->word_idx = 0;
768 parser->state = PP_NULL;
769 } else if( isdigit( (unsigned char) c ) // Found a number
776 buffer_reset( parser->buf );
777 buffer_add_char( parser->buf, c );
778 push_pp_state( parser, PP_ARRAY_VALUE );
779 parser->state = PP_NUM;
781 report_pp_error( parser, "Unexpected character \'%c\' at beginning of array", c );
789 @brief Look for the comma after a value in an array, or the end of the array.
790 @param parser Pointer to the current JSONPushParser.
791 @param c The current input character.
792 @return 0 if successful, or 1 upon error.
794 We have just passed a value in a JSON array. We expect to see either a separating
795 comma or a right square bracket.
797 static int do_array_value( JSONPushParser* parser, char c ) {
799 if( isspace( (unsigned char) c ) ) // skip white space
801 else if( ',' == c ) { // Found a comma
802 parser->state = PP_ARRAY_COMMA;
803 } else if( ']' == c ) { // End of array
804 if( parser->handlers.handleEndArray )
805 rc = parser->handlers.handleEndArray( parser->blob );
806 pop_pp_state( parser );
807 check_pp_end( parser );
809 report_pp_error( parser,
810 "Unexpected character \'%c\' in array; expected comma or right bracket", c );
818 @brief Look for the next element of a JSON array, or the end of the array.
819 @param parser Pointer to the current JSONPushParser.
820 @param c The current input character.
821 @return 0 if successful, or 1 upon error.
823 We have just passed a separator comma within a JSON array. We expect to see a value.
824 Anything else is an error.
826 static int do_array_comma( JSONPushParser* parser, char c ) {
828 if( isspace( (unsigned char) c ) ) // skip white space
830 else if( '\"' == c ) { // Found a string
831 buffer_reset( parser->buf );
832 push_pp_state( parser, PP_ARRAY_VALUE );
833 parser->state = PP_STR;
834 } else if( '[' == c ) { // Found a nested array
835 if( parser->handlers.handleBeginArray )
836 rc = parser->handlers.handleBeginArray( parser->blob );
837 push_pp_state( parser, PP_ARRAY_VALUE );
838 parser->state = PP_ARRAY_BEGIN;
839 } else if( '{' == c ) { // Found a nested object
840 if( parser->handlers.handleBeginObj )
841 rc = parser->handlers.handleBeginObj( parser->blob );
842 push_pp_state( parser, PP_ARRAY_VALUE );
843 parser->state = PP_OBJ_BEGIN;
844 } else if( 't' == c ) {
845 push_pp_state( parser, PP_ARRAY_VALUE );
846 parser->word_idx = 0;
847 parser->state = PP_TRUE;
848 } else if( 'f' == c ) {
849 push_pp_state( parser, PP_ARRAY_VALUE );
850 parser->word_idx = 0;
851 parser->state = PP_FALSE;
852 } else if( 'n' == c ) {
853 push_pp_state( parser, PP_ARRAY_VALUE );
854 parser->word_idx = 0;
855 parser->state = PP_NULL;
856 } else if( isdigit( (unsigned char) c ) // Found a number
863 buffer_reset( parser->buf );
864 buffer_add_char( parser->buf, c );
865 push_pp_state( parser, PP_ARRAY_VALUE );
866 parser->state = PP_NUM;
868 report_pp_error( parser, "Expected array value; found \'%c\'", c );
876 @brief Look for the first entry of a JSON object, or the end of the object.
877 @param parser Pointer to the current JSONPushParser.
878 @param c The current input character.
879 @return 0 if successful, or 1 upon error.
881 We have just entered a JSON object. We expect to see a string literal (the key for the
882 first entry), or the end of the object. Anything else is an error.
884 static int do_obj_begin( JSONPushParser* parser, char c ) {
886 if( isspace( (unsigned char) c ) ) // skip white space
888 else if( '\"' == c ) { // Found a string
889 buffer_reset( parser->buf );
890 push_pp_state( parser, PP_OBJ_KEY );
891 parser->state = PP_STR;
892 } else if( '}' == c ) { // End of object
893 if( parser->handlers.handleEndObj )
894 rc = parser->handlers.handleEndObj( parser->blob );
895 pop_pp_state( parser );
896 check_pp_end( parser );
898 report_pp_error( parser, "Unexpected character \'%c\' at beginning of object", c );
906 @brief Look for a colon between the key and value of an entry in a JSON object.
907 @param parser Pointer to the current JSONPushParser.
908 @param c The current input character.
909 @return 0 if successful, or 1 upon error.
911 We have just found the key for an entry in a JSON object. We expect to see a colon next.
912 Anything else is an error.
914 static int do_obj_key ( JSONPushParser* parser, char c ) {
916 if( isspace( (unsigned char) c ) ) // skip white space
918 else if( ':' == c ) {
919 parser->state = PP_OBJ_COLON;
921 report_pp_error( parser, "Expected colon within JSON object; found \'%c\'", c );
929 @brief Look for a value in a JSON object.
930 @param parser Pointer to the current JSONPushParser.
931 @param c The current input character.
932 @return 0 if successful, or 1 upon error.
934 We have just found a colon after the key of an entry in a JSON object. We expect to see
935 the associated value next. Anything else is an error.
937 static int do_obj_colon( JSONPushParser* parser, char c ) {
939 if( isspace( (unsigned char) c ) ) // skip white space
941 else if( '\"' == c ) { // Found a string
942 buffer_reset( parser->buf );
943 push_pp_state( parser, PP_OBJ_VALUE );
944 parser->state = PP_STR;
945 } else if( '[' == c ) { // Found a nested array
946 if( parser->handlers.handleBeginArray )
947 rc = parser->handlers.handleBeginArray( parser->blob );
948 push_pp_state( parser, PP_OBJ_VALUE );
949 parser->state = PP_ARRAY_BEGIN;
950 } else if( '{' == c ) { // Found a nested object
951 if( parser->handlers.handleBeginObj )
952 rc = parser->handlers.handleBeginObj( parser->blob );
953 push_pp_state( parser, PP_OBJ_VALUE );
954 parser->state = PP_OBJ_BEGIN;
955 } else if( 't' == c ) {
956 push_pp_state( parser, PP_OBJ_VALUE );
957 parser->word_idx = 0;
958 parser->state = PP_TRUE;
959 } else if( 'f' == c ) {
960 push_pp_state( parser, PP_OBJ_VALUE );
961 parser->word_idx = 0;
962 parser->state = PP_FALSE;
963 } else if( 'n' == c ) {
964 push_pp_state( parser, PP_OBJ_VALUE );
965 parser->word_idx = 0;
966 parser->state = PP_NULL;
967 } else if( isdigit( (unsigned char) c ) // Found a number
974 buffer_reset( parser->buf );
975 buffer_add_char( parser->buf, c );
976 push_pp_state( parser, PP_OBJ_VALUE );
977 parser->state = PP_NUM;
979 report_pp_error( parser,
980 "Unexpected character \'%c\' after colon within JSON object", c );
988 @brief Look for a comma in a JSON object, or for the end of the object.
989 @param parser Pointer to the current JSONPushParser.
990 @param c The current input character.
991 @return 0 if successful, or 1 upon error.
993 We have just finished a key/value entry in a JSON object. We expect to see either a comma
994 or a right curly brace. Anything else is an error.
996 static int do_obj_value( JSONPushParser* parser, char c ) {
998 if( isspace( (unsigned char) c ) ) // skip white space
1000 else if( ',' == c ) {
1001 parser->state = PP_OBJ_COMMA;
1002 } else if( '}' == c ) {
1003 if( parser->handlers.handleEndObj )
1004 rc = parser->handlers.handleEndObj( parser->blob );
1005 pop_pp_state( parser );
1006 check_pp_end( parser );
1008 report_pp_error( parser, "Expected comma or '}' within JSON object; found \'%c\'", c );
1016 @brief Look for the next entry in a JSON object.
1017 @param parser Pointer to the current JSONPushParser.
1018 @param c The current input character.
1019 @return 0 if successful, or 1 upon error.
1021 We have just found a separator comma within a JSON object. We expect to find a string to
1022 serve as the key for the next entry. Anything else is an error.
1024 static int do_obj_comma( JSONPushParser* parser, char c ) {
1026 if( isspace( (unsigned char) c ) ) // skip white space
1028 else if( '\"' == c ) { // Found a string
1029 buffer_reset( parser->buf );
1030 push_pp_state( parser, PP_OBJ_KEY );
1031 parser->state = PP_STR;
1033 report_pp_error( parser, "Expected key string in a JSON object; found \'%c\'", c );
1041 @brief Accumulate characters of the keyword "true".
1042 @param parser Pointer to the current JSONPushParser.
1043 @param c The current input character.
1044 @return 0 if successful, or 1 upon error.
1046 There are several ways to recognize keywords. You can accumulate characters and then
1047 look at the whole thing; you can have a distinct parser state for each letter; etc..
1049 In this parser we have only three keywords to recognize, starting with three different
1050 letters; no other bare words are allowed. When we see the opening "t" we expect to
1051 see "rue" following it, and similarly for "false" and "null". We compare each letter
1052 to the letter we expect to see at that position, and complain if they don't match.
1054 static int do_true( JSONPushParser* parser, char c ) {
1056 switch ( found_keyword( parser, c, "true", 4 ) ) {
1058 rc = 1; // wrong character found (already reported)
1060 case 0 : // so far so good
1062 case 1 : // we have all the right characters
1063 if( parser->handlers.handleBool )
1064 rc = parser->handlers.handleBool( parser->blob, 1 );
1066 pop_pp_state( parser );
1067 check_pp_end( parser );
1075 @brief Accumulate characters of the keyword "false".
1076 @param parser Pointer to the current JSONPushParser.
1077 @param c The current input character.
1078 @return 0 if successful, or 1 upon error.
1080 See the discussion of do_true().
1082 static int do_false( JSONPushParser* parser, char c ) {
1084 switch ( found_keyword( parser, c, "false", 5 ) ) {
1086 rc = 1; // wrong character found (already reported)
1088 case 0 : // so far so good
1090 case 1 : // we have all the right characters
1091 if( parser->handlers.handleBool )
1092 rc = parser->handlers.handleBool( parser->blob, 0 );
1094 pop_pp_state( parser );
1095 check_pp_end( parser );
1103 @brief Accumulate characters of the keyword "null".
1104 @param parser Pointer to the current JSONPushParser.
1105 @param c The current input character.
1106 @return 0 if successful, or 1 upon error.
1108 See the discussion of do_true().
1110 static int do_null( JSONPushParser* parser, char c ) {
1112 switch ( found_keyword( parser, c, "null", 4 ) ) {
1114 rc = 1; // wrong character found (already reported)
1116 case 0 : // so far so good
1118 case 1 : // we have all the right characters
1119 if( parser->handlers.handleNull )
1120 rc = parser->handlers.handleNull( parser->blob );
1121 parser->again = c; // Revisit this character next time around
1122 pop_pp_state( parser );
1123 check_pp_end( parser );
1131 @brief Accumulate a character for a specified keyword
1132 @param parser Pointer to the current JSONPushParser
1133 @param c The current input character
1134 @param keyword The keyword we're looking for
1135 @param maxlen The length of the keyword (obviating strlen())
1136 @return 0 If @a c is the correct next letter in the keyword,
1137 or 1 if the keyword is finished correctly, or -1 upon error.
1139 Accumulate successive letters in a specified keyword. We don't actually store the
1140 letters anywhere; we just check to make sure they're the letters we expect.
1142 static int found_keyword( JSONPushParser* parser, char c,
1143 const char* keyword, unsigned maxlen ) {
1145 if( ++parser->word_idx >= maxlen ) {
1146 // We have all the characters; now check the one following. It had better be
1147 // either white space or punctuation.
1148 if( !isspace( (unsigned char) c ) && !ispunct( (unsigned char) c ) ) {
1149 report_pp_error( parser, "Unexpected character '%c' after \"true\" keyword", c );
1150 return -1; // bad character at end of keyword -- e.g. "trueY"
1153 } else if( keyword[ parser->word_idx ] == c ) {
1156 report_pp_error( parser, "Expected '%c' in keyword \"%s\"; found '%c'\n",
1157 keyword[ parser->word_idx ], keyword, c );
1164 @brief We have reached the end of the JSON string. There should be nothing but white space.
1165 @param parser Pointer to the current JSONPushParser.
1166 @param c The current input character.
1167 @return 0 if successful, or 1 upon error.
1170 static int do_end( JSONPushParser* parser, char c ) {
1172 if( isspace( (unsigned char) c ) ) // skip white space
1175 report_pp_error( parser,
1176 "Expected nothing but white space afer a JSON string; found \'%c\'", c );
1183 // -------- End of state handlers --------------------------
1186 @brief Push the current parser state onto a stack.
1187 @param parser Pointer to the current JSONPushParser.
1188 @param state The state to which we will return when we pop it off.
1190 We use a stack to simulate recursive descent. At every point where a recursive descent
1191 parser would descend, we push the a state onto the stack, i.e. the state we want to
1192 go when we come back. Where a recursive descent parser would return from the descent,
1193 we pop the previously stored state off the stack.
1195 Note that the state we push is not the current state, but some other state. We simulate
1196 a descent in order to parse some JSON value, and after parsing it, we need to be in some
1197 other state. So we push that future state onto the stack in advance.
1199 static void push_pp_state( JSONPushParser* parser, PPState state ) {
1200 // Allocate a StateNode -- from the free list if possible,
1201 // Or from the heap if necessary.
1203 if( parser->free_states ) {
1204 node = parser->free_states;
1205 parser->free_states = node->next;
1207 node = safe_malloc( sizeof( StateNode ) );
1208 node->keylist = osrfNewStringArray( 8 );
1211 // Now popuate it, and push it onto the stack.
1212 node->state = state;
1213 osrfStringArraySwap( parser->keylist, node->keylist );
1214 node->next = parser->state_stack;
1215 parser->state_stack = node;
1219 @brief Restore the previous state of the parser.
1220 @param parser Pointer to the current JSONPushParser.
1222 See also push_pp_state().
1224 static void pop_pp_state( JSONPushParser* parser ) {
1225 if( ! parser->state_stack ) {
1226 parser->state = PP_END; // shouldn't happen
1228 StateNode* node = parser->state_stack;
1229 parser->state_stack = node->next;
1230 node->next = parser->free_states;
1231 parser->free_states = node;
1232 // Transfer the contents of the popped node to the parser
1233 parser->state = node->state;
1234 osrfStringArraySwap( parser->keylist, node->keylist );
1235 osrfStringArrayClear( node->keylist );
1239 static void check_pp_end( JSONPushParser* parser ) {
1240 if( PP_END == parser->state && parser->handlers.handleEndJSON )
1241 parser->handlers.handleEndJSON( parser->blob );
1245 @brief Issue an error message from the parser.
1246 @param parser Pointer to the parser issuing the message
1247 @param msg A printf-style format string. Subsequent parameters, if any, will be
1248 expanded and inserted into the output message.
1250 static void report_pp_error( JSONPushParser* parser, const char* msg, ... ) {
1251 VA_LIST_TO_STRING( msg );
1252 if( parser->handlers.handleError )
1253 parser->handlers.handleError( parser->blob, VA_BUF, parser->line, parser->pos );
1255 osrfLogError( OSRF_LOG_MARK, "JSON Error at line %u, position %u: %s",
1256 parser->line, parser->pos, VA_BUF );
1260 @brief Free a JSONPushParser and everything it owns.
1261 @param parser Pointer to the JSONPustParser to be freed.
1263 void jsonPushParserFree( JSONPushParser* parser ) {
1265 buffer_free( parser->buf );
1267 // Pop off all the StateNodes, and then free them
1268 while( parser->state_stack ) {
1269 pop_pp_state( parser );
1272 while( parser->free_states ) {
1273 StateNode* temp = parser->free_states->next;
1274 osrfStringArrayFree( parser->free_states->keylist );
1275 free( parser->free_states );
1276 parser->free_states = temp;
1278 osrfStringArrayFree( parser->keylist );