4 #include "http_config.h"
6 #include "http_protocol.h"
7 #include "http_request.h"
8 //#include "apr_compat.h"
9 #include "apr_strings.h"
10 #include "apr_reslist.h"
12 #include "util_filter.h"
13 #include "opensrf/string_array.h"
14 #include "opensrf/utils.h"
15 #include "opensrf/log.h"
17 #include <sys/types.h>
21 #define APACHE_TOOLS_MAX_POST_SIZE 10485760 /* 10 MB */
22 #define MODULE_NAME "idlchunk_module"
24 /* Define the config defaults here */
25 #define MODIDLCHUNK_CONFIG_STRIP_COMMENTS "IDLChunkStripComments"
26 #define MODIDLCHUNK_CONFIG_CONTENT_TYPE "IDLChunkContentType"
27 #define MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT "text/html"
28 #define MODIDLCHUNK_CONFIG_STRIP_PI "IDLChunkStripPI"
29 #define MODIDLCHUNK_CONFIG_DOCTYPE "IDLChunkDoctype"
30 #define MODIDLCHUNK_CONFIG_STRIP_DOCTYPE "IDLChunkStripDoctype"
31 #define MODIDLCHUNK_CONFIG_ESCAPE_SCRIPT "IDLChunkEscapeScript"
33 module AP_MODULE_DECLARE_DATA idlchunk_module;
35 int idlChunkInScript = 0; /* are we in the middle of a <script> tag */
36 osrfStringArray* mparams = NULL;
43 apr_bucket_brigade* brigade; /* the bucket brigade we buffer our data into */
44 XML_Parser parser; /* our XML parser */
49 int stripComments; /* should we strip comments on the way out? */
50 int stripPI; /* should we strip processing instructions on the way out? */
52 int escapeScript; /* if true, we html-escape anything text inside a <script> tag */
53 char* contentType; /* the content type used to server pages */
54 char* doctype; /* the doctype header to send before any other data */
58 static osrfStringArray* apacheParseParms(request_rec* r) {
60 if( r == NULL ) return NULL;
61 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "got a valid request_rec");
64 apr_pool_t *p = r->pool; /* memory pool */
65 growing_buffer* buffer = buffer_init(1025);
67 /* gather the post args and append them to the url query string */
68 if( !strcmp(r->method,"POST") ) {
70 ap_setup_client_block(r, REQUEST_CHUNKED_DECHUNK);
72 //osrfLogDebug(OSRF_LOG_MARK, "gateway reading post data..");
73 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "idlchunk reading post data..");
75 if(ap_should_client_block(r)) {
78 /* Start with url query string, if any */
80 if(r->args && r->args[0])
81 buffer_add(buffer, r->args);
85 //osrfLogDebug(OSRF_LOG_MARK, "gateway client has post data, reading...");
87 /* Append POST data */
90 while( (bread = ap_get_client_block(r, body, sizeof(body) - 1)) ) {
93 //osrfLogInfo(OSRF_LOG_MARK,
94 // "ap_get_client_block(): returned error, exiting POST reader");
99 buffer_add( buffer, body );
101 //osrfLogDebug(OSRF_LOG_MARK,
102 // "gateway read %ld bytes: %d bytes of data so far", bread, buffer->n_used);
104 if(buffer->n_used > APACHE_TOOLS_MAX_POST_SIZE) {
105 //osrfLogError(OSRF_LOG_MARK, "gateway received POST larger "
106 // "than %d bytes. dropping request", APACHE_TOOLS_MAX_POST_SIZE);
112 //osrfLogDebug(OSRF_LOG_MARK, "gateway done reading post data");
117 if(r->args && r->args[0])
118 buffer_add(buffer, r->args);
119 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "idlchunk read GET data..");
123 if(buffer->n_used > 0)
124 arg = apr_pstrdup(p, buffer->buf);
129 if( !arg || !arg[0] ) { /* we received no request */
133 //osrfLogDebug(OSRF_LOG_MARK, "parsing URL params from post/get request data: %s", arg);
134 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "parsing URL params from post/get request data: %s", arg);
136 osrfStringArray* sarray = osrfNewStringArray(12); /* method parameters */
138 char* key = NULL; /* query item name */
139 char* val = NULL; /* query item value */
141 /* Parse the post/get request data into a series of name/value pairs. */
142 /* Load each name into an even-numbered slot of an osrfStringArray, and */
143 /* the corresponding value into the following odd-numbered slot. */
145 while( arg && (val = ap_getword(p, (const char**) &arg, '&'))) {
147 key = ap_getword(r->pool, (const char**) &val, '=');
151 ap_unescape_url(key);
152 ap_unescape_url(val);
154 //osrfLogDebug(OSRF_LOG_MARK, "parsed URL params %s=%s", key, val);
156 osrfStringArrayAdd(sarray, key);
157 osrfStringArrayAdd(sarray, val);
159 if( sanity++ > 1000 ) {
160 //osrfLogError(OSRF_LOG_MARK,
161 // "Parsing URL params failed sanity check: 1000 iterations");
162 osrfStringArrayFree(sarray);
168 //osrfLogDebug(OSRF_LOG_MARK,
169 // "Apache tools parsed %d params key/values", sarray->size / 2 );
176 static osrfStringArray* apacheGetParamKeys(osrfStringArray* params) {
177 if(params == NULL) return NULL;
178 osrfStringArray* sarray = osrfNewStringArray(12);
180 //osrfLogDebug(OSRF_LOG_MARK, "Fetching URL param keys");
181 for( i = 0; i < params->size; i++ )
182 osrfStringArrayAdd(sarray, osrfStringArrayGetString(params, i++));
186 static osrfStringArray* apacheGetParamValues(osrfStringArray* params, char* key) {
188 if(params == NULL || key == NULL) return NULL;
189 osrfStringArray* sarray = osrfNewStringArray(12);
191 //osrfLogDebug(OSRF_LOG_MARK, "Fetching URL values for key %s", key);
193 for( i = 0; i < params->size; i++ ) {
194 const char* nkey = osrfStringArrayGetString(params, i++);
195 if(nkey && !strcmp(nkey, key))
196 osrfStringArrayAdd(sarray, osrfStringArrayGetString(params, i));
202 static char* apacheGetFirstParamValue(osrfStringArray* params, char* key) {
203 if(params == NULL || key == NULL) return NULL;
206 //osrfLogDebug(OSRF_LOG_MARK, "Fetching first URL value for key %s", key);
207 for( i = 0; i < params->size; i++ ) {
208 const char* nkey = osrfStringArrayGetString(params, i++);
209 if(nkey && !strcmp(nkey, key))
210 return strdup(osrfStringArrayGetString(params, i));
217 static int apacheDebug( char* msg, ... ) {
218 VA_LIST_TO_STRING(msg);
219 fprintf(stderr, "%s\n", VA_BUF);
225 static int apacheError( char* msg, ... ) {
226 VA_LIST_TO_STRING(msg);
227 fprintf(stderr, "%s\n", VA_BUF);
229 return HTTP_INTERNAL_SERVER_ERROR;
235 /* get the content type from the config */
236 static const char* idlChunkSetContentType(cmd_parms *params, void *cfg, const char *arg) {
237 idlChunkConfig* config = (idlChunkConfig*) cfg;
238 config->contentType = (char*) arg;
243 /* get the strip PI flag from the config */
244 static const char* idlChunkSetStripPI(cmd_parms *params, void *cfg, const char *arg) {
245 idlChunkConfig* config = (idlChunkConfig*) cfg;
246 char* a = (char*) arg;
247 config->stripPI = (a && !strcasecmp(a, "yes")) ? 1 : 0;
251 /* Get the strip comments flag from the config */
252 static const char* idlChunkSetStripComments(cmd_parms *params, void *cfg, const char *arg) {
253 idlChunkConfig* config = (idlChunkConfig*) cfg;
254 char* a = (char*) arg;
255 config->stripComments = (a && !strcasecmp(a, "yes")) ? 1 : 0;
259 static const char* idlChunkSetEscapeScript(cmd_parms *params, void *cfg, const char *arg) {
260 idlChunkConfig* config = (idlChunkConfig*) cfg;
261 char* a = (char*) arg;
262 config->escapeScript = (a && !strcasecmp(a, "yes")) ? 1 : 0;
266 static const char* idlChunkSetStripDoctype(cmd_parms *params, void *cfg, const char *arg) {
267 idlChunkConfig* config = (idlChunkConfig*) cfg;
268 char* a = (char*) arg;
269 config->stripDoctype = (a && !strcasecmp(a, "yes")) ? 1 : 0;
274 /* Get the user defined doctype from the config */
275 static const char* idlChunkSetDoctype(cmd_parms *params, void *cfg, const char *arg) {
276 idlChunkConfig* config = (idlChunkConfig*) cfg;
277 config->doctype = (char*) arg;
281 /* Tell apache how to set our config variables */
282 static const command_rec idlChunkCommands[] = {
283 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_COMMENTS,
284 idlChunkSetStripComments, NULL, ACCESS_CONF, "IDLCHUNK Strip Comments"),
285 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_CONTENT_TYPE,
286 idlChunkSetContentType, NULL, ACCESS_CONF, "IDLCHUNK Content Type"),
287 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_PI,
288 idlChunkSetStripPI, NULL, ACCESS_CONF, "IDLCHUNK Strip XML Processing Instructions"),
289 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_DOCTYPE,
290 idlChunkSetDoctype, NULL, ACCESS_CONF, "IDLCHUNK Doctype Declaration"),
291 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_STRIP_DOCTYPE,
292 idlChunkSetStripDoctype, NULL, ACCESS_CONF, "IDLCHUNK Strip Doctype Declaration"),
293 AP_INIT_TAKE1( MODIDLCHUNK_CONFIG_ESCAPE_SCRIPT,
294 idlChunkSetEscapeScript, NULL, ACCESS_CONF, "IDLCHUNK Escape data in script tags"),
298 /* Creates a new config object */
299 static void* idlChunkCreateDirConfig( apr_pool_t* p, char* dir ) {
300 idlChunkConfig* config =
301 (idlChunkConfig*) apr_palloc( p, sizeof(idlChunkConfig) );
303 config->stripComments = 0;
305 config->stripDoctype = 1;
306 config->escapeScript = 1;
307 config->contentType = MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT;
308 config->doctype = NULL;
310 return (void*) config;
313 /* keep for a while in case we ever need it */
315 #define IDLCHUNK_INHERIT(p, c, f) ((c->f) ? c->f : p->f);
316 static void* idlChunkMergeDirConfig(apr_pool_t *p, void *base, void *overrides) {
317 idlChunkConfig* parent = base;
318 idlChunkConfig* child = overrides;
319 idlChunkConfig* newConf = (idlChunkConfig*) apr_pcalloc(p, sizeof(idlChunkConfig));
320 newConf->contentType = IDLCHUNK_INHERIT(parent, child, contentType);
321 newConf->stripComments = IDLCHUNK_INHERIT(parent, child, stripComments);
327 /* We need a global parser object because sub-requests, with different
328 * filter contexts, are parsing part of the same document.
329 * This means that this filter will only work in forked (non-threaded) environments.
330 * XXX Figure out how to share pointers/data accross filters */
331 XML_Parser parser = NULL;
333 /* utility function which passes data to the next filter */
334 static void _fwrite( ap_filter_t* filter, char* data, ... ) {
335 if(!(filter && data)) return;
336 idlChunkContext* ctx = (idlChunkContext*) filter->ctx;
337 VA_LIST_TO_STRING(data);
338 ap_fwrite( filter->next, ctx->brigade, VA_BUF, strlen(VA_BUF));
342 /** XXX move me to opensrf/utils.h */
343 #define OSRF_UTILS_REPLACE_CHAR(str, o, n)\
346 while(str[i] != '\0') {\
353 /* cycles through the attributes attached to an element */
354 static char* find_id_attr( const char** atts ) {
355 if(!atts) return NULL;
357 for( i = 0; atts[i] && atts[i+1]; i++ ) {
358 const char* name = atts[i];
359 char* value = (char*)atts[i+1];
360 if (!strcmp(name,"id")) return value;
364 /* In case we don't find anything to return */
368 /* cycles through the attributes attached to an element */
369 static void printAttr( ap_filter_t* filter, const char** atts ) {
372 for( i = 0; atts[i] && atts[i+1]; i++ ) {
373 const char* name = atts[i];
374 const char* value = atts[i+1];
375 char* escaped = ap_escape_html(filter->r->pool, value);
377 /* we make a big assumption here that if the string contains a ',
378 * then the original attribute was wrapped in "s - so recreate that */
379 if( strchr( escaped, '\'' ) ) {
380 OSRF_UTILS_REPLACE_CHAR(escaped,'"','\'');
381 _fwrite( filter, " %s=\"%s\"", name, escaped );
384 OSRF_UTILS_REPLACE_CHAR(escaped,'\'','"');
385 _fwrite( filter, " %s='%s'", name, escaped );
392 /* Starts an XML element */
393 static void XMLCALL startElement(void *userData, const char *name, const char **atts) {
395 ap_filter_t* filter = (ap_filter_t*) userData;
398 if (!strcmp(name,"class")) {
399 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, filter->r,"Looking at %s with id of %s",name, find_id_attr(atts));
401 if (osrfStringArrayContains(mparams, find_id_attr(atts))) {
403 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, filter->r,"Found desired class %s", find_id_attr(atts));
407 if (all || inChunk || (name && (!strcmp(name,"IDL")))) {
409 idlChunkConfig* config = ap_get_module_config(
410 filter->r->per_dir_config, &idlchunk_module );
411 _fwrite(filter, "<%s", name );
412 printAttr( filter, atts );
413 if (!strncmp(config->contentType, MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT, 9)) {
414 _fwrite(filter, " />", name );
416 _fwrite(filter, ">", name );
418 if(!strcmp(name, "script"))
419 idlChunkInScript = 1;
423 /* Handles the character data */
424 static void XMLCALL charHandler( void* userData, const XML_Char* s, int len ) {
425 ap_filter_t* filter = (ap_filter_t*) userData;
427 memset( data, '\0', sizeof(data) );
428 memcpy( data, s, len );
430 idlChunkConfig* config = ap_get_module_config(
431 filter->r->per_dir_config, &idlchunk_module );
433 if (all || inChunk) {
434 if( idlChunkInScript && ! config->escapeScript ) {
435 _fwrite( filter, "%s", data );
438 char* escaped = ap_escape_html(filter->r->pool, data);
439 _fwrite( filter, "%s", escaped );
444 static void XMLCALL handlePI( void* userData, const XML_Char* target, const XML_Char* data) {
445 ap_filter_t* filter = (ap_filter_t*) userData;
446 _fwrite(filter, "<?%s %s?>", target, data);
449 static void XMLCALL handleComment( void* userData, const XML_Char* comment ) {
450 ap_filter_t* filter = (ap_filter_t*) userData;
451 _fwrite(filter, "<!-- %s -->", comment);
454 /* Ends an XML element */
455 static void XMLCALL endElement(void *userData, const char *name) {
457 if (all || inChunk || (name && (!strcmp(name,"IDL")))) {
459 ap_filter_t* filter = (ap_filter_t*) userData;
460 idlChunkConfig* config = ap_get_module_config(
461 filter->r->per_dir_config, &idlchunk_module );
462 if (!strncmp(config->contentType, MODIDLCHUNK_CONFIG_CONTENT_TYPE_DEFAULT, 9)) {
465 _fwrite( filter, "</%s>", name );
466 if(!strcmp(name, "script"))
467 idlChunkInScript = 1;
470 if (!strcmp(name,"class")) inChunk = 0;
473 static void XMLCALL doctypeHandler( void* userData,
474 const char* name, const char* sysid, const char* pubid, int hasinternal ) {
476 ap_filter_t* filter = (ap_filter_t*) userData;
477 char* s = (sysid) ? (char*) sysid : "";
478 char* p = (pubid) ? (char*) pubid : "";
479 _fwrite( filter, "<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", name, p, s );
483 /* The handler. Create a new parser and/or filter context where appropriate
484 * and parse the chunks of data received from the brigade
486 static int idlChunkHandler( ap_filter_t *f, apr_bucket_brigade *brigade ) {
488 idlChunkContext* ctx = f->ctx;
489 apr_bucket* currentBucket = NULL;
490 apr_pool_t* pool = f->r->pool;
493 osrfStringArray* params = NULL;
496 /* load the per-dir/location config */
497 idlChunkConfig* config = ap_get_module_config(
498 f->r->per_dir_config, &idlchunk_module );
500 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
501 0, f->r, "IDLCHUNK Config:\nContent Type = %s, "
502 "Strip PI = %s, Strip Comments = %s, Doctype = %s",
504 (config->stripPI) ? "yes" : "no",
505 (config->stripComments) ? "yes" : "no",
508 /* set the content type based on the config */
509 ap_set_content_type(f->r, config->contentType);
511 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Set content type");
513 params = apacheParseParms(f->r); /* free me */
514 mparams = apacheGetParamValues( params, "class" ); /* free me */
518 if (mparams && mparams->size > 0) all = 0;
520 //ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Parsed the params, if any");
522 /* create the XML parser */
524 if( parser == NULL ) {
526 parser = XML_ParserCreate("UTF-8");
527 XML_SetUserData(parser, f);
528 XML_SetElementHandler(parser, startElement, endElement);
529 XML_SetCharacterDataHandler(parser, charHandler);
530 if(!config->stripDoctype)
531 XML_SetStartDoctypeDeclHandler( parser, doctypeHandler );
533 XML_SetProcessingInstructionHandler(parser, handlePI);
534 if(!config->stripComments)
535 XML_SetCommentHandler(parser, handleComment);
538 /* create the filter context */
540 f->ctx = ctx = apr_pcalloc( pool, sizeof(*ctx));
541 ctx->brigade = apr_brigade_create( pool, f->c->bucket_alloc );
542 ctx->parser = parser;
546 if(firstrun) { /* we haven't started writing the data to the stream yet */
548 /* go ahead and write the doctype out if we have one defined */
549 if(config->doctype) {
550 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
551 0, f->r, "IDLCHUNK DOCTYPE => %s", config->doctype);
552 _fwrite(f, "%s\n", config->doctype);
557 /* cycle through the buckets in the brigade */
558 while (!APR_BRIGADE_EMPTY(brigade)) {
560 /* grab the next bucket */
561 currentBucket = APR_BRIGADE_FIRST(brigade);
563 /* clean up when we're done */
564 if (APR_BUCKET_IS_EOS(currentBucket) || APR_BUCKET_IS_FLUSH(currentBucket)) {
565 APR_BUCKET_REMOVE(currentBucket);
566 APR_BRIGADE_INSERT_TAIL(ctx->brigade, currentBucket);
567 ap_pass_brigade(f->next, ctx->brigade);
568 XML_ParserFree(parser);
569 if (params) osrfStringArrayFree(params);
570 if (mparams) osrfStringArrayFree(mparams);
575 /* read the incoming data */
576 int s = apr_bucket_read(currentBucket, &data, &len, APR_NONBLOCK_READ);
577 if( s != APR_SUCCESS ) {
578 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
579 "IDLCHUNK error reading data from filter with status %d", s);
580 if (params) osrfStringArrayFree(params);
581 if (mparams) osrfStringArrayFree(mparams);
587 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
588 0, f->r, "IDLCHUNK read %d bytes", (int)len);
590 /* push data into the XML push parser */
591 if ( XML_Parse(ctx->parser, data, len, 0) == XML_STATUS_ERROR ) {
594 memcpy(tmp, data, len);
597 /* log and die on XML errors */
598 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
599 "IDLCHUNK XML Parse Error: %s at line %d: parsing %s: data %s",
600 XML_ErrorString(XML_GetErrorCode(ctx->parser)),
601 (int) XML_GetCurrentLineNumber(ctx->parser), f->r->filename, tmp);
603 XML_ParserFree(parser);
604 if (params) osrfStringArrayFree(params);
605 if (mparams) osrfStringArrayFree(mparams);
607 return HTTP_INTERNAL_SERVER_ERROR;
611 /* so a subrequest doesn't re-read this bucket */
612 apr_bucket_delete(currentBucket);
615 apr_brigade_destroy(brigade);
616 if (params) osrfStringArrayFree(params);
617 if (mparams) osrfStringArrayFree(mparams);
622 /* Register the filter function as a filter for modifying the HTTP body (content) */
623 static void idlChunkRegisterHook(apr_pool_t *pool) {
624 ap_register_output_filter("IDLCHUNK", idlChunkHandler, NULL, AP_FTYPE_CONTENT_SET);
627 /* Define the module data */
628 module AP_MODULE_DECLARE_DATA idlchunk_module = {
629 STANDARD20_MODULE_STUFF,
630 idlChunkCreateDirConfig, /* dir config creater */
631 NULL, /* dir merger --- default is to override */
632 NULL, /* server config */
633 NULL, /* merge server config */
634 idlChunkCommands, /* command apr_table_t */
635 idlChunkRegisterHook /* register hook */