From 2f83f103b758a32d6213ec0b84918438f0bc7524 Mon Sep 17 00:00:00 2001 From: miker Date: Sat, 5 Jan 2008 19:07:02 +0000 Subject: [PATCH] Patch from Scott McKellar: These patches are the culmination of several postings on this subject. The overall effect is to store numbers in jsonObjects as strings, rather than as doubles, in order to avoid needless loss of precision in translating back and forth between text and floating point representations. I shall not repeat the details outlined in previous posts, but rather focus on what's new: 1. A new extern function jsonNewNumberStringObject constructs a JSON_NUMBER from a character string. If the string is not numeric according to JSON rules, the function returns NULL. 2. A new extern function jsonScrubNumber accepts a character string and reformats it, if possible, into a numeric string that is valid according to JSON rules. For example, it transforms " +00.42" into "0.42". The transformed string is returned as a char* that the caller is responsible for freeing. jsonScrubNumber performs this transformation by manipulating text, not by passing the value through a double. Therefore it can handle numbers that would be too long, too large, or too small for strtod() and its kindred to handle. It accepts leading white space and scientific notation, but not trailing white space, hex, or octal. If the input string is not numeric, jsonScrubNumber returns NULL. 3. The doubleToString function now translates the incoming double to a character string with up to 30 decimal digits of precision. That should be enough to minimize the impact on existing code, depending of course on how faithfully snprintf() does the formatting. 4. In osrf_json.h: I changed the signature of the next-to-last function pointer in a jsonParserHandler, so that it accepts a character pointer instead of a double. Likewise for the corresponding declaration of _jsonHandleNumber in osrf_json_utils.h. 5. In osrf_json_parser.c: I construct a JSON_NUMBER from the input character string without passing it through a double. If the input character string is not valid according to JSON rules, I try to use the new jsonScrubNumber() to normalize the formatting so that JSON can accept it. git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1199 9efc2488-bf62-4759-914b-345cdb29e865 --- include/opensrf/osrf_json.h | 28 ++- include/opensrf/osrf_json_utils.h | 2 +- src/libopensrf/osrf_json_object.c | 321 +++++++++++++++++++++++++++--- src/libopensrf/osrf_json_parser.c | 35 ++-- 4 files changed, 343 insertions(+), 43 deletions(-) diff --git a/include/opensrf/osrf_json.h b/include/opensrf/osrf_json.h index 51e76cd..5312fec 100644 --- a/include/opensrf/osrf_json.h +++ b/include/opensrf/osrf_json.h @@ -94,7 +94,7 @@ struct jsonParserHandlerStruct { void (*handleNull) (void* userData); void (*handleString) (void* userData, char* string); void (*handleBool) (void* userData, int boolval); - void (*handleNumber) (void* userData, double num); + void (*handleNumber) (void* userData, const char* numstr); void (*handleError) (void* userData, char* err, ...); }; typedef struct jsonParserHandlerStruct jsonParserHandler; @@ -185,10 +185,14 @@ jsonObject* jsonNewObjectFmt(const char* data, ...); jsonObject* jsonNewObjectType(int type); /** - * Creates a new number object + * Creates a new number object from a double */ jsonObject* jsonNewNumberObject( double num ); +/** + * Creates a new number object from a numeric string + */ +jsonObject* jsonNewNumberStringObject( const char* numstr ); /** * Creates a new json bool @@ -297,6 +301,7 @@ void jsonObjectSetString(jsonObject* dest, const char* string); /* sets the number value for the object */ void jsonObjectSetNumber(jsonObject* dest, double num); +int jsonObjectSetNumberString(jsonObject* dest, const char* string); /* sets the class hint for this object */ void jsonObjectSetClass(jsonObject* dest, const char* classname ); @@ -319,7 +324,26 @@ jsonObject* jsonObjectClone( const jsonObject* o ); */ char* jsonObjectToSimpleString( const jsonObject* o ); +/** + Allocate a buffer and format a specified numeric value into it, + with up to 30 decimal digits of precision. Caller is responsible + for freeing the buffer. + **/ +char* doubleToString( double num ); + +/** + Return 1 if the string is numeric, otherwise return 0. + This validation follows the rules defined by the grammar at: + http://www.json.org/ + **/ +int jsonIsNumeric( const char* s ); +/** + Allocate and reformat a numeric string into one that is valid + by JSON rules. If the string is not numeric, return NULL. + Caller is responsible for freeing the buffer. + **/ +char* jsonScrubNumber( const char* s ); /* provides an XPATH style search interface (e.g. /some/node/here) and return the object at that location if one exists. Naturally, diff --git a/include/opensrf/osrf_json_utils.h b/include/opensrf/osrf_json_utils.h index 26e9e1f..3ab75a0 100644 --- a/include/opensrf/osrf_json_utils.h +++ b/include/opensrf/osrf_json_utils.h @@ -46,7 +46,7 @@ void _jsonHandleEndArray(void*); void _jsonHandleNull(void*); void _jsonHandleString(void*, char* string); void _jsonHandleBool(void*, int boolval); -void _jsonHandleNumber(void*, double num); +void _jsonHandleNumber(void*, const char* numstr); void _jsonHandleError(void*, char* str, ...); struct jsonInternalParserStruct { diff --git a/src/libopensrf/osrf_json_object.c b/src/libopensrf/osrf_json_object.c index 9dcd123..28c3fd0 100644 --- a/src/libopensrf/osrf_json_object.c +++ b/src/libopensrf/osrf_json_object.c @@ -13,6 +13,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */ +#include +#include +#include #include #include #include @@ -27,7 +30,7 @@ GNU General Public License for more details. } else if( _obj_->type == JSON_ARRAY && newtype != JSON_ARRAY ) { \ osrfListFree(_obj_->value.l); \ _obj_->value.l = NULL; \ -} else if( _obj_->type == JSON_STRING && newtype != JSON_STRING ) { \ +} else if( _obj_->type == JSON_STRING || _obj_->type == JSON_NUMBER ) { \ free(_obj_->value.s); \ _obj_->value.s = NULL; \ } \ @@ -137,7 +140,22 @@ jsonObject* jsonNewObjectFmt(const char* data, ...) { jsonObject* jsonNewNumberObject( double num ) { jsonObject* o = jsonNewObject(NULL); o->type = JSON_NUMBER; - o->value.n = num; + o->value.s = doubleToString( num ); + return o; +} + +/** + * Creates a new number object from a numeric string + */ +jsonObject* jsonNewNumberStringObject( const char* numstr ) { + if( !numstr ) + numstr = "0"; + else if( !jsonIsNumeric( numstr ) ) + return NULL; + + jsonObject* o = jsonNewObject(NULL); + o->type = JSON_NUMBER; + o->value.s = strdup( numstr ); return o; } @@ -163,6 +181,7 @@ void jsonObjectFree( jsonObject* o ) { case JSON_HASH : osrfHashFree(o->value.h); break; case JSON_ARRAY : osrfListFree(o->value.l); break; case JSON_STRING : free(o->value.s); break; + case JSON_NUMBER : free(o->value.s); break; } // Stick the old jsonObject onto a free list @@ -261,15 +280,8 @@ static void add_json_to_buffer( const jsonObject* obj, growing_buffer * buf ) { break; case JSON_NUMBER: { - double x = obj->value.n; - if( x <= INT_MAX && x >= INT_MIN && x == (int) x ) { - INT_TO_STRING((int)x); - OSRF_BUFFER_ADD(buf, INTSTR); - - } else { - DOUBLE_TO_STRING(x); - OSRF_BUFFER_ADD(buf, DOUBLESTR); - } + if(obj->value.s) OSRF_BUFFER_ADD( buf, obj->value.s ); + else OSRF_BUFFER_ADD_CHAR( buf, '0' ); break; } @@ -389,25 +401,74 @@ unsigned long jsonObjectRemoveKey( jsonObject* dest, const char* key) { return -1; } +/** + Allocate a buffer and format a specified numeric value into it. + Caller is responsible for freeing the buffer. +**/ +char* doubleToString( double num ) { + + char buf[ 64 ]; + size_t len = snprintf(buf, sizeof( buf ), "%.30g", num) + 1; + if( len < sizeof( buf ) ) + return strdup( buf ); + else + { + // Need a bigger buffer (should never be necessary) + + char* bigger_buff = safe_malloc( len + 1 ); + (void) snprintf(bigger_buff, len + 1, "%.30g", num); + return bigger_buff; + } +} + char* jsonObjectGetString(const jsonObject* obj) { - return (obj && obj->type == JSON_STRING) ? obj->value.s : NULL; + if(obj) + { + if( obj->type == JSON_STRING ) + return obj->value.s; + else if( obj->type == JSON_NUMBER ) + return obj->value.s ? obj->value.s : "0"; + else + return NULL; + } + else + return NULL; } double jsonObjectGetNumber( const jsonObject* obj ) { - return (obj && obj->type == JSON_NUMBER) ? obj->value.n : 0; + return (obj && obj->type == JSON_NUMBER && obj->value.s) + ? strtod( obj->value.s, NULL ) : 0; } void jsonObjectSetString(jsonObject* dest, const char* string) { if(!(dest && string)) return; JSON_INIT_CLEAR(dest, JSON_STRING); - free(dest->value.s); dest->value.s = strdup(string); } +/** + Turn a jsonObject into a JSON_NUMBER (if it isn't already one) and store + a specified numeric string in it. If the string is not numeric, + store the equivalent of zero, and return an error status. +**/ +int jsonObjectSetNumberString(jsonObject* dest, const char* string) { + if(!(dest && string)) return -1; + JSON_INIT_CLEAR(dest, JSON_NUMBER); + + if( jsonIsNumeric( string ) ) { + dest->value.s = strdup(string); + return 0; + } + else { + dest->value.s = NULL; // equivalent to zero + return -1; + } +} + void jsonObjectSetNumber(jsonObject* dest, double num) { if(!dest) return; JSON_INIT_CLEAR(dest, JSON_NUMBER); - dest->value.n = num; + dest->value.s = doubleToString( num ); } void jsonObjectSetClass(jsonObject* dest, const char* classname ) { @@ -438,7 +499,8 @@ jsonObject* jsonObjectClone( const jsonObject* o ) { result = jsonNewObject(jsonObjectGetString(o)); break; case JSON_NUMBER: - result = jsonNewNumberObject(jsonObjectGetNumber(o)); + result = jsonNewObject( o->value.s ); + result->type = JSON_NUMBER; break; case JSON_BOOL: result = jsonNewBoolObject(jsonBoolIsTrue((jsonObject*) o)); @@ -479,25 +541,230 @@ char* jsonObjectToSimpleString( const jsonObject* o ) { switch( o->type ) { - case JSON_NUMBER: { + case JSON_NUMBER: + value = strdup( o->value.s ? o->value.s : "0" ); + break; + + case JSON_STRING: + value = strdup(o->value.s); + } + + return value; +} + +/** + Return 1 if the string is numeric, otherwise return 0. + This validation follows the rules defined by the grammar at: + http://www.json.org/ + **/ +int jsonIsNumeric( const char* s ) { + + if( !s || !*s ) return 0; + + const char* p = s; + + // skip leading minus sign, if present (leading plus sign not allowed) + + if( '-' == *p ) + ++p; + + // There must be at least one digit to the left of the decimal + + if( isdigit( (unsigned char) *p ) ) { + if( '0' == *p++ ) { + + // If the first digit is zero, it must be the + // only digit to the lerft of the decimal + + if( isdigit( (unsigned char) *p ) ) + return 0; + } + else { + + // Skip oer the following digits + + while( isdigit( (unsigned char) *p ) ) ++p; + } + } + else + return 0; + + if( !*p ) + return 1; // integer + + if( '.' == *p ) { + + ++p; + + // If there is a decimal point, there must be + // at least one digit to the right of it + + if( isdigit( (unsigned char) *p ) ) + ++p; + else + return 0; + + // skip over contiguous digits + + while( isdigit( (unsigned char) *p ) ) ++p; + } + + if( ! *p ) + return 1; // decimal fraction, no exponent + else if( *p != 'e' && *p != 'E' ) + return 0; // extra junk, no exponent + else + ++p; + + // If we get this far, we have the beginnings of an exponent. + // Skip over optional sign of exponent. + + if( '-' == *p || '+' == *p ) + ++p; + + // There must be at least one digit in the exponent + + if( isdigit( (unsigned char) *p ) ) + ++p; + else + return 0; + + // skip over contiguous digits + + while( isdigit( (unsigned char) *p ) ) ++p; + + if( *p ) + return 0; // extra junk + else + return 1; // number with exponent +} + +/** + Allocate and reformat a numeric string into one that is valid + by JSON rules. If the string is not numeric, return NULL. + Caller is responsible for freeing the buffer. + **/ +char* jsonScrubNumber( const char* s ) { + if( !s || !*s ) return NULL; + + growing_buffer* buf = buffer_init( 64 ); + + // Skip leading white space, if present + + while( isspace( (unsigned char) *s ) ) ++s; + + // Skip leading plus sign, if present, but keep a minus + + if( '-' == *s ) + { + buffer_add_char( buf, '-' ); + ++s; + } + else if( '+' == *s ) + ++s; + + if( '\0' == *s ) { + // No digits found + + buffer_free( buf ); + return NULL; + } + // Skip any leading zeros + + while( '0' == *s ) ++s; + + // Capture digits to the left of the decimal, + // and note whether there are any. + + int left_digit = 0; // boolean + + if( isdigit( (unsigned char) *s ) ) { + buffer_add_char( buf, *s++ ); + left_digit = 1; + } + + while( isdigit( (unsigned char) *s ) ) + buffer_add_char( buf, *s++ ); - if( o->value.n == (int) o->value.n ) { - INT_TO_STRING((int) o->value.n); - value = strdup(INTSTR); + // Now we expect to see a decimal point, + // an exponent, or end-of-string. - } else { - DOUBLE_TO_STRING(o->value.n); - value = strdup(DOUBLESTR); + switch( *s ) + { + case '\0' : + break; + case '.' : + { + // Add a single leading zero, if we need to + + if( ! left_digit ) + buffer_add_char( buf, '0' ); + buffer_add_char( buf, '.' ); + ++s; + + if( ! left_digit && ! isdigit( (unsigned char) *s ) ) + { + // No digits on either side of decimal + + buffer_free( buf ); + return NULL; } + // Collect digits to right of decimal + + while( isdigit( (unsigned char) *s ) ) + buffer_add_char( buf, *s++ ); + break; } + case 'e' : + case 'E' : - case JSON_STRING: - value = strdup(o->value.s); + // Exponent; we'll deal with it later, but + // meanwhile make sure we have something + // to its left + + if( ! left_digit ) + buffer_add_char( buf, '1' ); + break; + default : + + // Unexpected character; bail out + + buffer_free( buf ); + return NULL; } - return value; -} + if( '\0' == *s ) // Are we done yet? + return buffer_release( buf ); + if( 'e' != *s && 'E' != *s ) { + // Unexpected character: bail out + + buffer_free( buf ); + return NULL; + } + + // We have an exponent. Load the e or E, + // and the sign if there is one. + + buffer_add_char( buf, *s++ ); + + if( '+' == *s || '-' == *s ) + buffer_add_char( buf, *s++ ); + + // Collect digits of the exponent + + while( isdigit( (unsigned char) *s ) ) + buffer_add_char( buf, *s++ ); + + // There better not be anything left + + if( *s ) { + buffer_free( buf ); + return NULL; + } + + return buffer_release( buf ); +} diff --git a/src/libopensrf/osrf_json_parser.c b/src/libopensrf/osrf_json_parser.c index 2d8bc8b..8cb84af 100644 --- a/src/libopensrf/osrf_json_parser.c +++ b/src/libopensrf/osrf_json_parser.c @@ -349,22 +349,30 @@ int _jsonParserHandleNumber( jsonParserContext* ctx ) { return 1; } - /* make me more strict */ - char* err = NULL; - double d = strtod(ctx->buffer->buf, &err); - if(err && err[0] != '\0') - return _jsonParserError(ctx, "Invalid number sequence"); - JSON_STATE_REMOVE(ctx, JSON_STATE_IN_NUMBER); - OSRF_BUFFER_RESET(ctx->buffer); if(ctx->handler->handleNumber) - ctx->handler->handleNumber( ctx->userData, d ); + { + if( jsonIsNumeric( ctx->buffer->buf ) ) + ctx->handler->handleNumber( ctx->userData, ctx->buffer->buf ); + else { + // The number string is not numeric according to JSON rules. + // Scrub it into an acceptable format. + + char* scrubbed = jsonScrubNumber( ctx->buffer->buf ); + if( !scrubbed ) + return _jsonParserError(ctx, "Invalid number sequence"); + else { + ctx->handler->handleNumber( ctx->userData, scrubbed ); + free( scrubbed ); + } + } + } + ctx->index--; /* scooch back to the first non-digit number */ + JSON_STATE_REMOVE(ctx, JSON_STATE_IN_NUMBER); + OSRF_BUFFER_RESET(ctx->buffer); return 0; } - - - int jsonParseChunk( jsonParserContext* ctx, const char* data, int datalen, int flags ) { if( !( ctx && ctx->handler && data && datalen > 0 )) return -1; @@ -638,9 +646,10 @@ void _jsonHandleBool(void* ctx, int boolval) { _jsonInsertParserItem(p, obj); } -void _jsonHandleNumber(void* ctx, double num) { +void _jsonHandleNumber(void* ctx, const char* numstr) { + jsonObject* obj = jsonNewNumberStringObject(numstr); jsonInternalParser* p = (jsonInternalParser*) ctx; - _jsonInsertParserItem(p, jsonNewNumberObject(num)); + _jsonInsertParserItem(p, obj); } void _jsonHandleError(void* ctx, char* str, ...) { -- 2.43.2