2 Copyright (C) 2009 Equinox Software Inc.
3 Scott McKellar <scott@esilibrary.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
18 @brief Pretty-print JSON.
20 Read JSON from a file and output it to standard output with consistent indentation
25 format_json [ filename [ ... ] ]
27 Each command-line argument is the name of a file that format_json will read in turn
28 and format as JSON. A single hyphen denotes standard input. If no file is specified,
29 format_json reads standard input.
31 The input file[s] may contain multiple JSON values, but a JSON value may not span more
32 than a single file. In the output, successive JSON values are separated by blank lines.
34 The primary purpose of this formatter is to translate JSON into a canonical format that
35 can be easily read and parsed by, for example, a perl script, without having to create
36 a full JSON parser. For that reason, every square bracket and curly brace is put on a
37 line by itself, although it might be more aesthetically pleasing to put it at the end of
38 the same line as whatever precedes it.
40 A secondary purpose is to make ugly, all-run-together JSON more readable to the human eye.
42 Finally, this program serves as an example of how to use the stream parser, especially
43 for files that are too big to be loaded into memory at once. To that end, the internal
44 logic is extensively commented.
46 Implementation details:
48 When using a stream parser it is almost always necessary to implement a finite state
49 automaton, and this formatter is no exception.
51 We define a collection of callback functions for the parser to call at various points,
52 We also set up a structure (called a Formatter) for the parser to pass back to the
53 callbacks via a void pointer. The Formatter supplies information about where we are and
54 what we're doing; in particular, it includes the state variable for our finite state
57 The parser is also a finite state automaton internally, and it also needs a struct (called
58 a JSONPushParser) to keep track of where it is and what it's doing. As a result, we have
59 two finite state automatons passing control back and forth. The parser handles syntax and
60 the Formatter handles semantics.
62 With a couple of exceptions, each callback returns a status code back to the parser that
63 calls it: 0 for success and non-zero for error. For example, a numeric literal might be
64 out of range, or an object key might be misspelled or out of place, or we might encounter
65 an object when we expect an array. Those rules reflect the semantics of the particular
66 kind of JSON that we're trying to parse. If a callback returns non-zero, the parser stops.
68 In the case of this formatter, any JSON is okay as long as the syntax is valid, and the
69 parser takes care of the syntax. Hence the callback functions routinely return zero.
75 #include "opensrf/utils.h"
76 #include "opensrf/osrf_utf8.h"
77 #include "opensrf/jsonpush.h"
80 @brief Enumeration of states for a finite state automaton.
83 CTX_OPEN, /**< Not currently within a JSON value. */
84 CTX_ARRAY_BEGIN, /**< At the beginning of a JSON array. */
85 CTX_ARRAY, /**< In a JSON array with at least one value so far. */
86 CTX_OBJ_BEGIN, /**< At the beginning of a JSON object. */
87 CTX_OBJ_KEY, /**< Between a key and its value in a JSON object. */
88 CTX_OBJ /**< In a JSON object with at least one entry so far. */
92 @brief Node for storing a Context in a stack.
95 struct ContextNode* next; /**< Linkage pointer for linked list. */
96 Context context; /**< The Context being stored for eventual restoration. */
98 typedef struct ContextNode ContextNode;
101 @brief Structure to be passed back to callback functions to keep track of where we are.
104 const char* filename; /**< Name of input file, or NULL for standard input */
105 Context context; /**< Current state. */
106 ContextNode* context_stack; /**< Stack of previous states. */
107 int indent; /**< How many current levels of indentation. */
108 growing_buffer* buf; /**< For formatting strings with escaped characters. */
109 JSONPushParser* parser; /**< Points to the current parser. */
112 static int format_file( Formatter* formatter, FILE* infile );
113 static void install_parser( Formatter* formatter );
115 static void indent( unsigned n );
116 static int formatString( void* blob, const char* str );
117 static int formatNumber( void* blob, const char* str );
118 static int formatLeftBracket( void* blob );
119 static int formatRightBracket( void* blob );
120 static int formatKey( void* blob, const char* str );
121 static int formatLeftBrace( void* blob );
122 static int formatRightBrace( void* blob );
123 static int formatBool( void* blob, int b );
124 static int formatNull( void* blob );
125 static void formatEnd( void* blob );
127 static void show_error( void* blob, const char* msg, unsigned line, unsigned pos );
129 static void push_context( Formatter* formatter );
130 static void pop_context( Formatter* formatter );
132 static ContextNode* free_context = NULL; // Free list for ContextNodes
136 @param argc Number of command line parameters, plus one.
137 @param argv Pointer to ragged array representing the command line.
138 @return EXIT_SUCCESS on success, or EXIT_FAILURE upon failure.
140 int main( int argc, char* argv[] ) {
142 int rc = EXIT_SUCCESS;
144 // Declare and initialize a Formatter
145 static Formatter formatter;
146 formatter.filename = NULL;
147 formatter.context = CTX_OPEN;
148 formatter.context_stack = NULL;
149 formatter.indent = 0;
150 formatter.buf = buffer_init( 32 );
151 install_parser( &formatter );
155 while( (++i < argc) && (0 == rc) ) {
156 // Iterate over the command line arguments.
157 // An argument "-" means to read standard input.
158 const char* filename = argv[ i ];
160 if( '-' == filename[ 0 ] && '\0' == filename[ 1 ] ) {
162 formatter.filename = NULL;
164 in = fopen( filename, "r" );
165 formatter.filename = filename;
169 fprintf( stderr, "Unable to open %s\n", filename );
171 // Reset the parser. This tells the parser that we're starting over for a new
172 // JSON value, and that it needs to reset the line counter and position counter
173 // for error messages. (We don't really need this for the first file, but it
175 jsonPushParserReset( formatter.parser );
178 if( format_file( &formatter, in ) )
180 if( formatter.filename )
185 // No command line arguments? Read standard input. Note that we don't have to
186 // reset the parser in this case, because we're only parsing once anyway.
187 format_file( &formatter, stdin );
190 // Clean up the formatter
191 jsonPushParserFree( formatter.parser );
192 buffer_free( formatter.buf );
193 while( formatter.context_stack )
194 pop_context( &formatter );
196 // Free the free ContextNodes shed from the stack
197 while( free_context ) {
198 ContextNode* temp = free_context->next;
199 free( free_context );
207 @brief Read and format a JSON file.
208 @param formatter Pointer to the current Formatter.
209 @param infile Pointer to the input file.
210 @return 0 if successful, or 1 upon error.
212 static int format_file( Formatter* formatter, FILE* infile ) {
214 const int bufsize = 4096;
220 num_read = fread( buf, 1, bufsize, infile );
222 if( jsonPush( formatter->parser, buf, num_read ) )
224 } while( num_read == bufsize && 0 == rc );
226 if( jsonPushParserFinish( formatter->parser ) )
230 fprintf( stderr, "\nError found in JSON file\n" );
236 @brief Create a JSONPushParser and install it in a Formatter.
237 @param formatter Pointer to the Formatter in which the parser is to be installed.
239 First we create a JSONHandlerMap to tell the parser what callback functions to call
240 at various points. Then we pass it to jsonNewPushParser, which makes its own copy of
241 the map, so it's okay for our original map to go out of scope.
243 static void install_parser( Formatter* formatter ) {
245 // Designate the callback functions to be installed in the parser.
246 JSONHandlerMap map = {
247 formatString, // string
248 formatNumber, // number
249 formatLeftBracket, // begin array
250 formatRightBracket, // end array
251 formatLeftBrace, // begin object
252 formatKey, // object key
253 formatRightBrace, // end object
254 formatBool, // keyword true or false
255 formatNull, // keyword null
256 formatEnd, // end of JSON
257 show_error // error handler
260 formatter->parser = jsonNewPushParser( &map, formatter );
264 @brief Format a string literal.
265 @param blob Pointer to Formatter, cast to a void pointer.
266 @param str Pointer to the contents of the string, with all escape sequences decoded.
269 Called by the parser when it finds a string literal (other than the name portion of a
270 name/value pair in a JSON object).
272 Write the literal within double quotes, with special and multibyte characters escaped
273 as needed, and a comma and white as needed.
275 static int formatString( void* blob, const char* str ) {
276 Formatter* formatter = (Formatter*) blob;
277 if( CTX_ARRAY == formatter->context )
279 else if( formatter->context != CTX_OBJ_KEY )
282 if( formatter->context != CTX_OBJ_KEY )
283 indent( formatter->indent );
285 // Escape characters as needed
286 buffer_reset( formatter->buf );
287 buffer_append_utf8( formatter->buf, str );
289 printf( "\"%s\"", OSRF_BUFFER_C_STR( formatter->buf ) );
291 // Pick the next state
292 if( CTX_ARRAY_BEGIN == formatter->context )
293 formatter->context = CTX_ARRAY;
294 else if ( CTX_OBJ_KEY == formatter->context )
295 formatter->context = CTX_OBJ;
301 @brief Format a numeric literal.
302 @param blob Pointer to Formatter, cast to a void pointer.
303 @param str Pointer to a string containing the numeric literal.
306 Called by the parser when it finds a numeric literal.
308 Write the numeric literal, with a comma and white space as needed.
310 static int formatNumber( void* blob, const char* str ) {
311 Formatter* formatter = (Formatter*) blob;
312 if( CTX_ARRAY == formatter->context )
314 else if( formatter->context != CTX_OBJ_KEY )
317 if( formatter->context != CTX_OBJ_KEY )
318 indent( formatter->indent );
322 // Pick the next state
323 if( CTX_ARRAY_BEGIN == formatter->context )
324 formatter->context = CTX_ARRAY;
325 else if ( CTX_OBJ_KEY == formatter->context )
326 formatter->context = CTX_OBJ;
332 @brief Format a left square bracket.
333 @param blob Pointer to Formatter, cast to a void pointer.
336 Called by the parser when it finds a left square bracket opening a JSON array.
338 Write a left square bracket, with a comma and white space as needed.
340 static int formatLeftBracket( void* blob ) {
341 Formatter* formatter = blob;
342 if( CTX_ARRAY == formatter->context || CTX_OBJ == formatter->context )
345 indent( formatter->indent++ );
348 // Pick the state to return to when we close the array.
349 if( CTX_ARRAY_BEGIN == formatter->context )
350 formatter->context = CTX_ARRAY;
351 else if ( CTX_OBJ_BEGIN == formatter->context )
352 formatter->context = CTX_OBJ;
353 push_context( formatter );
355 formatter->context = CTX_ARRAY_BEGIN;
360 @brief Format a right square bracket.
361 @param blob Pointer to Formatter, cast to a void pointer.
364 Called by the parser when it finds a right square bracket closing a JSON array.
366 Write a newline, indentation, and a right square bracket.
368 static int formatRightBracket( void* blob ) {
369 Formatter* formatter = blob;
371 indent( --formatter->indent );
374 pop_context( formatter );
379 @brief Formate a left curly brace.
380 @param blob Pointer to Formatter, cast to a void pointer.
383 Called by the parser when it finds a left curly brace opening a JSON object.
385 Write a left curly brace, with a comma and white space as needed.
387 static int formatLeftBrace( void* blob ) {
388 Formatter* formatter = blob;
389 if( CTX_ARRAY == formatter->context || CTX_OBJ == formatter->context )
392 indent( formatter->indent++ );
395 // Pick the state to return to when we close the object.
396 if( CTX_ARRAY_BEGIN == formatter->context )
397 formatter->context = CTX_ARRAY;
398 else if ( CTX_OBJ_BEGIN == formatter->context )
399 formatter->context = CTX_OBJ;
400 push_context( formatter );
402 formatter->context = CTX_OBJ_BEGIN;
407 @brief Format a right curly brace.
408 @param blob Pointer to Formatter, cast to a void pointer.
411 Called by the parser when it finds a right curly brace closing a JSON object.
413 Write a newline, indentation, and a right curly brace.
415 static int formatRightBrace( void* blob ) {
416 Formatter* formatter = blob;
418 indent( --formatter->indent );
421 pop_context( formatter );
426 @brief Format the key of a key/value pair in a JSON object.
427 @param blob Pointer to Formatter, cast to a void pointer.
428 @param str Pointer to a string containing the key.
431 Called by the parser when it finds the key of a key/value pair. It hasn't found the
432 accompanying colon yet, and if it doesn't find it later, it will return an error.
434 Write the key in double quotes, with a comma and white space as needed.
436 static int formatKey( void* blob, const char* str ) {
437 Formatter* formatter = blob;
438 if( CTX_OBJ == formatter->context )
442 indent( formatter->indent );
444 // Escape characters as needed
445 buffer_reset( formatter->buf );
446 buffer_append_utf8( formatter->buf, str );
448 printf( "\"%s\" : ", OSRF_BUFFER_C_STR( formatter->buf ) );
450 formatter->context = CTX_OBJ_KEY;
455 @brief Format a boolean value.
456 @param blob Pointer to Formatter, cast to a void pointer.
457 @param b An int used as a boolean to indicate whether the boolean value is true or false.
460 Called by the parser when it finds the JSON keyword "true" or "false".
462 Write "true" or "false" (without the quotes) with a comma and white as needed.
464 static int formatBool( void* blob, int b ) {
465 Formatter* formatter = (Formatter*) blob;
466 if( CTX_ARRAY == formatter->context )
468 else if( formatter->context != CTX_OBJ_KEY )
471 if( formatter->context != CTX_OBJ_KEY )
472 indent( formatter->indent );
474 printf( "%s", b ? "true" : "false" );
476 // Pick the next state.
477 if( CTX_ARRAY_BEGIN == formatter->context )
478 formatter->context = CTX_ARRAY;
479 else if ( CTX_OBJ_KEY == formatter->context )
480 formatter->context = CTX_OBJ;
486 @brief Format a null value.
487 @param blob Pointer to Formatter, cast to a void pointer.
490 Called by the parser when it finds the JSON keyword "null".
492 Write "null" (without the quotes) with a comma and white as needed.
494 static int formatNull( void* blob ) {
495 Formatter* formatter = (Formatter*) blob;
496 if( CTX_ARRAY == formatter->context )
498 else if( formatter->context != CTX_OBJ_KEY )
501 if( formatter->context != CTX_OBJ_KEY )
502 indent( formatter->indent );
506 if( CTX_ARRAY_BEGIN == formatter->context )
507 formatter->context = CTX_ARRAY;
508 else if ( CTX_OBJ_KEY == formatter->context )
509 formatter->context = CTX_OBJ;
515 @brief Respond to the end of a JSON value.
516 @param blob Pointer to Formatter, cast to a void pointer.
518 Called by the parser when it reaches the end of a JSON value.
520 This formatter acccepts multiple JSON values in succession. Tell the parser to look
521 for another one. Otherwise the parser will treat anything other than white space
522 beyond this point as an error.
524 Note that jsonPushParserResume() does @em not reset the line number and column number
525 used by the parser for error messages. If you want to do that. call jsonPushParserReset().
527 static void formatEnd( void* blob ) {
528 Formatter* formatter = blob;
529 jsonPushParserResume( formatter->parser );
534 @brief Issue an error message about a syntax error detected by the parser.
536 @param msg Pointer to a message describing the syntax error.
537 @param line Line number in the current file where the error was detected.
538 @param pos Column position in the current line where the error was detected.
540 Called by the parser when it encounters a syntax error.
542 Write the message to standard error, providing the file name (saved in the Formatter),
543 line number, and column position.
545 static void show_error( void* blob, const char* msg, unsigned line, unsigned pos ) {
546 Formatter* formatter = (Formatter*) blob;
547 const char* filename = formatter->filename;
549 filename = "standard input";
550 fprintf( stderr, "\nError in %s at line %u, position %u:\n%s\n",
551 filename, line, pos, msg );
555 @brief Write a specified number of indents, four spaces per indent.
556 @param n How many indents to write.
558 static void indent( unsigned n ) {
566 @brief Push the current state onto the stack.
567 @param formatter Pointer to the current Formatter.
569 We call this when we enter a JSON array or object. Later, when we reach the end of the
570 array or object, we'll call pop_context() to restore the saved state.
572 static void push_context( Formatter* formatter ) {
573 // Allocate a ContextNode; from the free list if possible,
574 // or from the heap if necessary
575 ContextNode* node = NULL;
578 free_context = free_context->next;
580 node = safe_malloc( sizeof( ContextNode ) );
582 node->context = formatter->context;
583 node->next = formatter->context_stack;
584 formatter->context_stack = node;
588 @brief Pop a state off the stack.
589 @param formatter Pointer to the current Formatter.
591 We call this at the end of a JSON array or object, in order to restore the state saved
592 when we entered the array or object.
594 static void pop_context( Formatter* formatter ) {
595 if( !formatter->context_stack )
596 return; // shouldn't happen
598 ContextNode* node = formatter->context_stack;
599 formatter->context_stack = node->next;
601 formatter->context = node->context;
603 node->next = free_context;