4 #include "http_config.h"
6 #include "http_protocol.h"
7 #include "http_request.h"
8 //#include "apr_compat.h"
9 #include "apr_strings.h"
10 #include "apr_reslist.h"
12 #include "util_filter.h"
13 #include "opensrf/string_array.h"
14 #include "opensrf/utils.h"
15 #include "opensrf/log.h"
17 #include <sys/types.h>
21 #define APACHE_TOOLS_MAX_POST_SIZE 10485760 /* 10 MB */
22 #define MODULE_NAME "idlchunk_module"
24 /* Define the config defaults here */
25 #define MODIDLCHUNK_CONFIG_STRIP_COMMENTS "IDLChunkStripComments"
26 #define MODIDLCHUNK_CONFIG_CONTENT_TYPE "IDLChunkContentType"
27 #define MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT "text/html"
28 #define MODIDLCHUNK_CONFIG_STRIP_PI "IDLChunkStripPI"
29 #define MODIDLCHUNK_CONFIG_DOCTYPE "IDLChunkDoctype"
30 #define MODIDLCHUNK_CONFIG_STRIP_DOCTYPE "IDLChunkStripDoctype"
31 #define MODIDLCHUNK_CONFIG_ESCAPE_SCRIPT "IDLChunkEscapeScript"
33 module AP_MODULE_DECLARE_DATA idlchunk_module;
35 int idlChunkInScript = 0; /* are we in the middle of a <script> tag */
36 osrfStringArray* mparams = NULL;
43 apr_bucket_brigade* brigade; /* the bucket brigade we buffer our data into */
44 XML_Parser parser; /* our XML parser */
49 int stripComments; /* should we strip comments on the way out? */
50 int stripPI; /* should we strip processing instructions on the way out? */
52 int escapeScript; /* if true, we html-escape anything text inside a <script> tag */
53 char* contentType; /* the content type used to server pages */
54 char* doctype; /* the doctype header to send before any other data */
58 static osrfStringArray* apacheParseParms(request_rec* r) {
60 if( r == NULL ) return NULL;
61 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "got a valid request_rec");
64 apr_pool_t *p = r->pool; /* memory pool */
65 growing_buffer* buffer = buffer_init(1025);
67 /* gather the post args and append them to the url query string */
68 if( !strcmp(r->method,"POST") ) {
70 ap_setup_client_block(r, REQUEST_CHUNKED_DECHUNK);
72 //osrfLogDebug(OSRF_LOG_MARK, "gateway reading post data..");
73 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "idlchunk reading post data..");
75 if(ap_should_client_block(r)) {
78 /* Start with url query string, if any */
80 if(r->args && r->args[0])
81 buffer_add(buffer, r->args);
85 //osrfLogDebug(OSRF_LOG_MARK, "gateway client has post data, reading...");
87 /* Append POST data */
90 while( (bread = ap_get_client_block(r, body, sizeof(body) - 1)) ) {
93 //osrfLogInfo(OSRF_LOG_MARK,
94 // "ap_get_client_block(): returned error, exiting POST reader");
99 buffer_add( buffer, body );
101 //osrfLogDebug(OSRF_LOG_MARK,
102 // "gateway read %ld bytes: %d bytes of data so far", bread, buffer->n_used);
104 if(buffer->n_used > APACHE_TOOLS_MAX_POST_SIZE) {
105 //osrfLogError(OSRF_LOG_MARK, "gateway received POST larger "
106 // "than %d bytes. dropping request", APACHE_TOOLS_MAX_POST_SIZE);
112 //osrfLogDebug(OSRF_LOG_MARK, "gateway done reading post data");
117 if(r->args && r->args[0])
118 buffer_add(buffer, r->args);
119 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "idlchunk read GET data..");
123 if(buffer->n_used > 0)
124 arg = apr_pstrdup(p, buffer->buf);
129 if( !arg || !arg[0] ) { /* we received no request */
133 //osrfLogDebug(OSRF_LOG_MARK, "parsing URL params from post/get request data: %s", arg);
134 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "parsing URL params from post/get request data: %s", arg);
136 osrfStringArray* sarray = osrfNewStringArray(12); /* method parameters */
138 char* key = NULL; /* query item name */
139 char* val = NULL; /* query item value */
141 /* Parse the post/get request data into a series of name/value pairs. */
142 /* Load each name into an even-numbered slot of an osrfStringArray, and */
143 /* the corresponding value into the following odd-numbered slot. */
145 while( arg && (val = ap_getword(p, (const char**) &arg, '&'))) {
147 key = ap_getword(r->pool, (const char**) &val, '=');
151 ap_unescape_url(key);
152 ap_unescape_url(val);
154 //osrfLogDebug(OSRF_LOG_MARK, "parsed URL params %s=%s", key, val);
156 osrfStringArrayAdd(sarray, key);
157 osrfStringArrayAdd(sarray, val);
159 if( sanity++ > 1000 ) {
160 //osrfLogError(OSRF_LOG_MARK,
161 // "Parsing URL params failed sanity check: 1000 iterations");
162 osrfStringArrayFree(sarray);
168 //osrfLogDebug(OSRF_LOG_MARK,
169 // "Apache tools parsed %d params key/values", sarray->size / 2 );
176 static osrfStringArray* apacheGetParamKeys(osrfStringArray* params) {
177 if(params == NULL) return NULL;
178 osrfStringArray* sarray = osrfNewStringArray(12);
180 //osrfLogDebug(OSRF_LOG_MARK, "Fetching URL param keys");
181 for( i = 0; i < params->size; i++ )
182 osrfStringArrayAdd(sarray, osrfStringArrayGetString(params, i++));
186 static osrfStringArray* apacheGetParamValues(osrfStringArray* params, char* key) {
188 if(params == NULL || key == NULL) return NULL;
189 osrfStringArray* sarray = osrfNewStringArray(12);
191 //osrfLogDebug(OSRF_LOG_MARK, "Fetching URL values for key %s", key);
193 for( i = 0; i < params->size; i++ ) {
194 const char* nkey = osrfStringArrayGetString(params, i++);
195 if(nkey && !strcmp(nkey, key))
196 osrfStringArrayAdd(sarray, osrfStringArrayGetString(params, i));
202 static char* apacheGetFirstParamValue(osrfStringArray* params, char* key) {
203 if(params == NULL || key == NULL) return NULL;
206 //osrfLogDebug(OSRF_LOG_MARK, "Fetching first URL value for key %s", key);
207 for( i = 0; i < params->size; i++ ) {
208 const char* nkey = osrfStringArrayGetString(params, i++);
209 if(nkey && !strcmp(nkey, key))
210 return strdup(osrfStringArrayGetString(params, i));
217 static int apacheDebug( char* msg, ... ) {
218 VA_LIST_TO_STRING(msg);
219 fprintf(stderr, "%s\n", VA_BUF);
225 static int apacheError( char* msg, ... ) {
226 VA_LIST_TO_STRING(msg);
227 fprintf(stderr, "%s\n", VA_BUF);
229 return HTTP_INTERNAL_SERVER_ERROR;
235 /* get the content type from the config */
236 static const char* idlChunkSetContentType(cmd_parms *params, void *cfg, const char *arg) {
237 idlChunkConfig* config = (idlChunkConfig*) cfg;
238 config->contentType = (char*) arg;
243 /* get the strip PI flag from the config */
244 static const char* idlChunkSetStripPI(cmd_parms *params, void *cfg, const char *arg) {
245 idlChunkConfig* config = (idlChunkConfig*) cfg;
246 char* a = (char*) arg;
247 config->stripPI = (a && !strcasecmp(a, "yes")) ? 1 : 0;
251 /* Get the strip comments flag from the config */
252 static const char* idlChunkSetStripComments(cmd_parms *params, void *cfg, const char *arg) {
253 idlChunkConfig* config = (idlChunkConfig*) cfg;
254 char* a = (char*) arg;
255 config->stripComments = (a && !strcasecmp(a, "yes")) ? 1 : 0;
259 static const char* idlChunkSetEscapeScript(cmd_parms *params, void *cfg, const char *arg) {
260 idlChunkConfig* config = (idlChunkConfig*) cfg;
261 char* a = (char*) arg;
262 config->escapeScript = (a && !strcasecmp(a, "yes")) ? 1 : 0;
266 static const char* idlChunkSetStripDoctype(cmd_parms *params, void *cfg, const char *arg) {
267 idlChunkConfig* config = (idlChunkConfig*) cfg;
268 char* a = (char*) arg;
269 config->stripDoctype = (a && !strcasecmp(a, "yes")) ? 1 : 0;
274 /* Get the user defined doctype from the config */
275 static const char* idlChunkSetDoctype(cmd_parms *params, void *cfg, const char *arg) {
276 idlChunkConfig* config = (idlChunkConfig*) cfg;
277 config->doctype = (char*) arg;
281 /* Tell apache how to set our config variables */
282 static const command_rec idlChunkCommands[] = {
283 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_COMMENTS,
284 idlChunkSetStripComments, NULL, ACCESS_CONF, "IDLCHUNK Strip Comments"),
285 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_CONTENT_TYPE,
286 idlChunkSetContentType, NULL, ACCESS_CONF, "IDLCHUNK Content Type"),
287 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_PI,
288 idlChunkSetStripPI, NULL, ACCESS_CONF, "IDLCHUNK Strip XML Processing Instructions"),
289 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_DOCTYPE,
290 idlChunkSetDoctype, NULL, ACCESS_CONF, "IDLCHUNK Doctype Declaration"),
291 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_DOCTYPE,
292 idlChunkSetStripDoctype, NULL, ACCESS_CONF, "IDLCHUNK Strip Doctype Declaration"),
293 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_ESCAPE_SCRIPT,
294 idlChunkSetEscapeScript, NULL, ACCESS_CONF, "IDLCHUNK Escape data in script tags"),
298 /* Creates a new config object */
299 static void* idlChunkCreateDirConfig( apr_pool_t* p, char* dir ) {
300 idlChunkConfig* config =
301 (idlChunkConfig*) apr_palloc( p, sizeof(idlChunkConfig) );
303 config->stripComments = 0;
305 config->stripDoctype = 1;
306 config->escapeScript = 1;
307 config->contentType = MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT;
308 config->doctype = NULL;
310 return (void*) config;
313 /* keep for a while in case we ever need it */
315 #define IDLCHUNK_INHERIT(p, c, f) ((c->f) ? c->f : p->f);
316 static void* idlChunkMergeDirConfig(apr_pool_t *p, void *base, void *overrides) {
317 idlChunkConfig* parent = base;
318 idlChunkConfig* child = overrides;
319 idlChunkConfig* newConf = (idlChunkConfig*) apr_pcalloc(p, sizeof(idlChunkConfig));
320 newConf->contentType = IDLCHUNK_INHERIT(parent, child, contentType);
321 newConf->stripComments = IDLCHUNK_INHERIT(parent, child, stripComments);
327 /* We need a global parser object because sub-requests, with different
328 * filter contexts, are parsing part of the same document.
329 * This means that this filter will only work in forked (non-threaded) environments.
330 * XXX Figure out how to share pointers/data accross filters */
331 XML_Parser parser = NULL;
333 /* utility function which passes data to the next filter */
334 static void _fwrite( ap_filter_t* filter, char* data, ... ) {
335 if(!(filter && data)) return;
336 idlChunkContext* ctx = (idlChunkContext*) filter->ctx;
337 VA_LIST_TO_STRING(data);
338 ap_fwrite( filter->next, ctx->brigade, VA_BUF, strlen(VA_BUF));
342 /** XXX move me to opensrf/utils.h */
343 #define OSRF_UTILS_REPLACE_CHAR(str, o, n)\
346 while(str[i] != '\0') {\
353 /* cycles through the attributes attached to an element */
354 static char* find_id_attr( const char** atts ) {
355 if(!atts) return NULL;
357 for( i = 0; atts[i] && atts[i+1]; i++ ) {
358 const char* name = atts[i];
359 char* value = (char*)atts[i+1];
360 if (!strcmp(name,"id")) return value;
365 /* cycles through the attributes attached to an element */
366 static void printAttr( ap_filter_t* filter, const char** atts ) {
369 for( i = 0; atts[i] && atts[i+1]; i++ ) {
370 const char* name = atts[i];
371 const char* value = atts[i+1];
372 char* escaped = ap_escape_html(filter->r->pool, value);
374 /* we make a big assumption here that if the string contains a ',
375 * then the original attribute was wrapped in "s - so recreate that */
376 if( strchr( escaped, '\'' ) ) {
377 OSRF_UTILS_REPLACE_CHAR(escaped,'"','\'');
378 _fwrite( filter, " %s=\"%s\"", name, escaped );
381 OSRF_UTILS_REPLACE_CHAR(escaped,'\'','"');
382 _fwrite( filter, " %s='%s'", name, escaped );
389 /* Starts an XML element */
390 static void XMLCALL startElement(void *userData, const char *name, const char **atts) {
392 ap_filter_t* filter = (ap_filter_t*) userData;
395 if (!strcmp(name,"class")) {
396 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, filter->r,"Looking at %s with id of %s",name, find_id_attr(atts));
398 if (osrfStringArrayContains(mparams, find_id_attr(atts))) {
400 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, filter->r,"Found desired class %s", find_id_attr(atts));
404 if (all || inChunk || (name && (!strcmp(name,"IDL")))) {
406 idlChunkConfig* config = ap_get_module_config(
407 filter->r->per_dir_config, &idlchunk_module );
408 _fwrite(filter, "<%s", name );
409 printAttr( filter, atts );
410 if (!strncmp(config->contentType, MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT, 9)) {
411 _fwrite(filter, " />", name );
413 _fwrite(filter, ">", name );
415 if(!strcmp(name, "script"))
416 idlChunkInScript = 1;
420 /* Handles the character data */
421 static void XMLCALL charHandler( void* userData, const XML_Char* s, int len ) {
422 ap_filter_t* filter = (ap_filter_t*) userData;
424 memset( data, '\0', sizeof(data) );
425 memcpy( data, s, len );
427 idlChunkConfig* config = ap_get_module_config(
428 filter->r->per_dir_config, &idlchunk_module );
430 if (all || inChunk) {
431 if( idlChunkInScript && ! config->escapeScript ) {
432 _fwrite( filter, "%s", data );
435 char* escaped = ap_escape_html(filter->r->pool, data);
436 _fwrite( filter, "%s", escaped );
441 static void XMLCALL handlePI( void* userData, const XML_Char* target, const XML_Char* data) {
442 ap_filter_t* filter = (ap_filter_t*) userData;
443 _fwrite(filter, "<?%s %s?>", target, data);
446 static void XMLCALL handleComment( void* userData, const XML_Char* comment ) {
447 ap_filter_t* filter = (ap_filter_t*) userData;
448 _fwrite(filter, "<!-- %s -->", comment);
451 /* Ends an XML element */
452 static void XMLCALL endElement(void *userData, const char *name) {
454 if (all || inChunk || (name && (!strcmp(name,"IDL")))) {
456 ap_filter_t* filter = (ap_filter_t*) userData;
457 idlChunkConfig* config = ap_get_module_config(
458 filter->r->per_dir_config, &idlchunk_module );
459 if (!strncmp(config->contentType, MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT, 9)) {
462 _fwrite( filter, "</%s>", name );
463 if(!strcmp(name, "script"))
464 idlChunkInScript = 1;
467 if (!strcmp(name,"class")) inChunk = 0;
470 static void XMLCALL doctypeHandler( void* userData,
471 const char* name, const char* sysid, const char* pubid, int hasinternal ) {
473 ap_filter_t* filter = (ap_filter_t*) userData;
474 char* s = (sysid) ? (char*) sysid : "";
475 char* p = (pubid) ? (char*) pubid : "";
476 _fwrite( filter, "<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", name, p, s );
480 /* The handler. Create a new parser and/or filter context where appropriate
481 * and parse the chunks of data received from the brigade
483 static int idlChunkHandler( ap_filter_t *f, apr_bucket_brigade *brigade ) {
485 idlChunkContext* ctx = f->ctx;
486 apr_bucket* currentBucket = NULL;
487 apr_pool_t* pool = f->r->pool;
490 osrfStringArray* params = NULL;
493 /* load the per-dir/location config */
494 idlChunkConfig* config = ap_get_module_config(
495 f->r->per_dir_config, &idlchunk_module );
497 ap_log_rerror(APLOG_MARK, APLOG_ERR,
498 0, f->r, "IDLCHUNK Config:\nContent Type = %s, "
499 "Strip PI = %s, Strip Comments = %s, Doctype = %s",
501 (config->stripPI) ? "yes" : "no",
502 (config->stripComments) ? "yes" : "no",
505 /* set the content type based on the config */
506 ap_set_content_type(f->r, config->contentType);
508 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Set content type");
510 params = apacheParseParms(f->r); /* free me */
511 mparams = apacheGetParamValues( params, "class" ); /* free me */
515 if (mparams && mparams->size > 0) all = 0;
517 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Parsed the params, if any");
519 /* create the XML parser */
521 if( parser == NULL ) {
523 parser = XML_ParserCreate("UTF-8");
524 XML_SetUserData(parser, f);
525 XML_SetElementHandler(parser, startElement, endElement);
526 XML_SetCharacterDataHandler(parser, charHandler);
527 if(!config->stripDoctype)
528 XML_SetStartDoctypeDeclHandler( parser, doctypeHandler );
530 XML_SetProcessingInstructionHandler(parser, handlePI);
531 if(!config->stripComments)
532 XML_SetCommentHandler(parser, handleComment);
535 /* create the filter context */
537 f->ctx = ctx = apr_pcalloc( pool, sizeof(*ctx));
538 ctx->brigade = apr_brigade_create( pool, f->c->bucket_alloc );
539 ctx->parser = parser;
543 if(firstrun) { /* we haven't started writing the data to the stream yet */
545 /* go ahead and write the doctype out if we have one defined */
546 if(config->doctype) {
547 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
548 0, f->r, "IDLCHUNK DOCTYPE => %s", config->doctype);
549 _fwrite(f, "%s\n", config->doctype);
554 /* cycle through the buckets in the brigade */
555 while (!APR_BRIGADE_EMPTY(brigade)) {
557 /* grab the next bucket */
558 currentBucket = APR_BRIGADE_FIRST(brigade);
560 /* clean up when we're done */
561 if (APR_BUCKET_IS_EOS(currentBucket) || APR_BUCKET_IS_FLUSH(currentBucket)) {
562 APR_BUCKET_REMOVE(currentBucket);
563 APR_BRIGADE_INSERT_TAIL(ctx->brigade, currentBucket);
564 ap_pass_brigade(f->next, ctx->brigade);
565 XML_ParserFree(parser);
566 if (params) osrfStringArrayFree(params);
567 if (mparams) osrfStringArrayFree(mparams);
572 /* read the incoming data */
573 int s = apr_bucket_read(currentBucket, &data, &len, APR_NONBLOCK_READ);
574 if( s != APR_SUCCESS ) {
575 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
576 "IDLCHUNK error reading data from filter with status %d", s);
577 if (params) osrfStringArrayFree(params);
578 if (mparams) osrfStringArrayFree(mparams);
584 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
585 0, f->r, "IDLCHUNK read %d bytes", (int)len);
587 /* push data into the XML push parser */
588 if ( XML_Parse(ctx->parser, data, len, 0) == XML_STATUS_ERROR ) {
591 memcpy(tmp, data, len);
594 /* log and die on XML errors */
595 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
596 "IDLCHUNK XML Parse Error: %s at line %d: parsing %s: data %s",
597 XML_ErrorString(XML_GetErrorCode(ctx->parser)),
598 (int) XML_GetCurrentLineNumber(ctx->parser), f->r->filename, tmp);
600 XML_ParserFree(parser);
601 if (params) osrfStringArrayFree(params);
602 if (mparams) osrfStringArrayFree(mparams);
604 return HTTP_INTERNAL_SERVER_ERROR;
608 /* so a subrequest doesn't re-read this bucket */
609 apr_bucket_delete(currentBucket);
612 apr_brigade_destroy(brigade);
613 if (params) osrfStringArrayFree(params);
614 if (mparams) osrfStringArrayFree(mparams);
619 /* Register the filter function as a filter for modifying the HTTP body (content) */
620 static void idlChunkRegisterHook(apr_pool_t *pool) {
621 ap_register_output_filter("IDLCHUNK", idlChunkHandler, NULL, AP_FTYPE_CONTENT_SET);
624 /* Define the module data */
625 module AP_MODULE_DECLARE_DATA idlchunk_module = {
626 STANDARD20_MODULE_STUFF,
627 idlChunkCreateDirConfig, /* dir config creater */
628 NULL, /* dir merger --- default is to override */
629 NULL, /* server config */
630 NULL, /* merge server config */
631 idlChunkCommands, /* command apr_table_t */
632 idlChunkRegisterHook /* register hook */