From cb4587be75bf9294d65c7173ff41c275a14e0f04 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Fri, 18 Aug 2017 11:43:31 -0400 Subject: [PATCH 1/1] LP#1709710: Make chunk sizing smart about XML quoting XML inside JSON as a quoted string that's itself inside XML causes quite the pile up of nested excaping of certain characters in OpenSRF PARTIAL_RESPONSE messages. Here we check for the worst offenders (<, >, &, and ") and account for the cost of escaping them in chunked response stanzas. Signed-off-by: Mike Rylander Signed-off-by: Galen Charlton Signed-off-by: Bill Erickson Signed-off-by: Jason Stephenson Signed-off-by: Galen Charlton --- include/opensrf/utils.h | 6 ++++++ src/libopensrf/osrf_app_session.c | 15 +++++++++++---- src/libopensrf/osrf_application.c | 11 ++++++++--- src/libopensrf/utils.c | 23 +++++++++++++++++++++++ src/perl/lib/OpenSRF/AppSession.pm | 24 +++++++++++++++++++++--- 5 files changed, 69 insertions(+), 10 deletions(-) diff --git a/include/opensrf/utils.h b/include/opensrf/utils.h index 2276dd6..34e0ba6 100644 --- a/include/opensrf/utils.h +++ b/include/opensrf/utils.h @@ -377,6 +377,12 @@ char* md5sum( const char* text, ... ); */ int osrfUtilsCheckFileDescriptor( int fd ); +/* + Returns the approximate additional length of + a string after XML escaping <, >, &, and ". +*/ +size_t osrfXmlEscapingLength ( const char* str ); + #ifdef __cplusplus } #endif diff --git a/src/libopensrf/osrf_app_session.c b/src/libopensrf/osrf_app_session.c index 5633e1b..28242e7 100644 --- a/src/libopensrf/osrf_app_session.c +++ b/src/libopensrf/osrf_app_session.c @@ -1363,13 +1363,20 @@ int osrfAppRequestRespondComplete( OSRF_STATUS_COMPLETE ); if (data) { - char* json = jsonObjectToJSON(data); - size_t data_size = strlen(json); + size_t raw_size = strlen(json); + size_t extra_size = osrfXmlEscapingLength(json); + size_t data_size = raw_size + extra_size; size_t chunk_size = OSRF_MSG_CHUNK_SIZE; - if (chunk_size > 0 && chunk_size < data_size) { - osrfSendChunkedResult(ses, requestId, json, data_size, chunk_size); + if (data_size > chunk_size) // calculate an escape-scaled chunk size + chunk_size = ((double)raw_size / (double)data_size) * (double)chunk_size; + + if (chunk_size > 0 && chunk_size < raw_size) { + // chunking -- response message exceeds max message size. + // break it up into chunks for partial delivery + + osrfSendChunkedResult(ses, requestId, json, raw_size, chunk_size); osrfAppSessionSendBatch( ses, &status, 1 ); } else { diff --git a/src/libopensrf/osrf_application.c b/src/libopensrf/osrf_application.c index ca6c219..4a5f53e 100644 --- a/src/libopensrf/osrf_application.c +++ b/src/libopensrf/osrf_application.c @@ -734,15 +734,20 @@ static int _osrfAppRespond( osrfMethodContext* ctx, const jsonObject* data, int if( data ) { char* data_str = jsonObjectToJSON(data); // free me (below) - size_t data_size = strlen(data_str); + size_t raw_size = strlen(data_str); + size_t extra_size = osrfXmlEscapingLength(data_str); + size_t data_size = raw_size + extra_size; size_t chunk_size = ctx->method->max_chunk_size; - if (chunk_size > 0 && chunk_size < data_size) { + if (data_size > chunk_size) // calculate an escape-scaled chunk size + chunk_size = ((double)raw_size / (double)data_size) * (double)chunk_size; + + if (chunk_size > 0 && chunk_size < raw_size) { // chunking -- response message exceeds max message size. // break it up into chunks for partial delivery osrfSendChunkedResult(ctx->session, ctx->request, - data_str, data_size, chunk_size); + data_str, raw_size, chunk_size); } else { diff --git a/src/libopensrf/utils.c b/src/libopensrf/utils.c index 6628c8c..1c049c0 100644 --- a/src/libopensrf/utils.c +++ b/src/libopensrf/utils.c @@ -781,3 +781,26 @@ int osrfUtilsCheckFileDescriptor( int fd ) { return 0; } +size_t osrfXmlEscapingLength ( const char* str ) { + int extra = 0; + const char* s; + for (s = str; *s; ++s) { + switch (*s) { + case '>': + case '<': + extra += 3; + break; + case '&': + extra += 4; + break; + case '"': + extra += 11; + break; + default: + break; + } + } + + return extra; +} + diff --git a/src/perl/lib/OpenSRF/AppSession.pm b/src/perl/lib/OpenSRF/AppSession.pm index 36d56b0..bb99787 100644 --- a/src/perl/lib/OpenSRF/AppSession.pm +++ b/src/perl/lib/OpenSRF/AppSession.pm @@ -1051,10 +1051,28 @@ sub respond { if ($self->max_chunk_size > 0) { # we might need to chunk my $str = OpenSRF::Utils::JSON->perl2JSON($msg); - if (length($str) > $self->max_chunk_size) { # send partials ("chunking") - for (my $i = 0; $i < length($str); $i += $self->max_chunk_size) { + + # XML can add a lot of length to a chunk due to escaping, so we + # calculate chunk size based on an XML-escaped version of the message. + # Example: If escaping doubles the length of the string then $ratio + # will be 0.5 and we'll cut the chunk size for this message in half. + + my $raw_length = length($str); + my $escaped_length = $raw_length; + $escaped_length += 11 * (() = ( $str =~ /"/g)); # 7 \s and " + $escaped_length += 4 * (() = ( $str =~ /&/g)); # & + $escaped_length += 3 * (() = ( $str =~ /[<>]/g)); # < / > + + my $chunk_size = $self->max_chunk_size; + + if ($escaped_length > $self->max_chunk_size) { + $chunk_size = ($raw_length / $escaped_length) * $self->max_chunk_size; + } + + if ($raw_length > $chunk_size) { # send partials ("chunking") + for (my $i = 0; $i < length($str); $i += $chunk_size) { $response = new OpenSRF::DomainObject::oilsResult::Partial; - $response->content( substr($str, $i, $self->max_chunk_size) ); + $response->content( substr($str, $i, $chunk_size) ); $self->session->send($type, $response, $self->threadTrace); } # This triggers reconstruction on the remote end -- 2.43.2