4 #include "http_config.h"
6 #include "http_protocol.h"
7 #include "http_request.h"
8 //#include "apr_compat.h"
9 #include "apr_strings.h"
10 #include "apr_reslist.h"
12 #include "util_filter.h"
13 #include "opensrf/utils.h"
15 #include <sys/types.h>
19 #define MODULE_NAME "xmlent_module"
21 /* Define the config defaults here */
22 #define MODXMLENT_CONFIG_STRIP_COMMENTS "XMLEntStripComments"
23 #define MODXMLENT_CONFIG_CONTENT_TYPE "XMLEntContentType"
24 #define MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT "text/html"
25 #define MODXMLENT_CONFIG_STRIP_PI "XMLEntStripPI"
26 #define MODXMLENT_CONFIG_DOCTYPE "XMLEntDoctype"
27 #define MODXMLENT_CONFIG_STRIP_DOCTYPE "XMLEntStripDoctype"
28 #define MODXMLENT_CONFIG_ESCAPE_SCRIPT "XMLEntEscapeScript"
30 module AP_MODULE_DECLARE_DATA xmlent_module;
32 int xmlEntInScript = 0; /* are we in the middle of a <script> tag */
36 apr_bucket_brigade* brigade; /* the bucket brigade we buffer our data into */
37 XML_Parser parser; /* our XML parser */
42 int stripComments; /* should we strip comments on the way out? */
43 int stripPI; /* should we strip processing instructions on the way out? */
45 int escapeScript; /* if true, we html-escape anything text inside a <script> tag */
46 char* contentType; /* the content type used to server pages */
47 char* doctype; /* the doctype header to send before any other data */
50 /* check to see if this is an empty XHTML element */
51 static int isEmptyElement(const char *element) {
52 /* derived from "grep EMPTY xhtml1-transitional.dtd" */
53 static char *emptyTags[] = {
75 while (!isEmpty && p != 0) {
76 isEmpty = !strcmp((const char*)element, (const char*)p);
77 p = *(emptyTags + ++i);
82 /* get the content type from the config */
83 static const char* xmlEntSetContentType(cmd_parms *params, void *cfg, const char *arg) {
84 xmlEntConfig* config = (xmlEntConfig*) cfg;
85 config->contentType = (char*) arg;
90 /* get the strip PI flag from the config */
91 static const char* xmlEntSetStripPI(cmd_parms *params, void *cfg, const char *arg) {
92 xmlEntConfig* config = (xmlEntConfig*) cfg;
93 char* a = (char*) arg;
94 config->stripPI = (a && !strcasecmp(a, "yes")) ? 1 : 0;
98 /* Get the strip comments flag from the config */
99 static const char* xmlEntSetStripComments(cmd_parms *params, void *cfg, const char *arg) {
100 xmlEntConfig* config = (xmlEntConfig*) cfg;
101 char* a = (char*) arg;
102 config->stripComments = (a && !strcasecmp(a, "yes")) ? 1 : 0;
106 static const char* xmlEntSetEscapeScript(cmd_parms *params, void *cfg, const char *arg) {
107 xmlEntConfig* config = (xmlEntConfig*) cfg;
108 char* a = (char*) arg;
109 config->escapeScript = (a && !strcasecmp(a, "yes")) ? 1 : 0;
113 static const char* xmlEntSetStripDoctype(cmd_parms *params, void *cfg, const char *arg) {
114 xmlEntConfig* config = (xmlEntConfig*) cfg;
115 char* a = (char*) arg;
116 config->stripDoctype = (a && !strcasecmp(a, "yes")) ? 1 : 0;
121 /* Get the user defined doctype from the config */
122 static const char* xmlEntSetDoctype(cmd_parms *params, void *cfg, const char *arg) {
123 xmlEntConfig* config = (xmlEntConfig*) cfg;
124 config->doctype = (char*) arg;
128 /* Tell apache how to set our config variables */
129 static const command_rec xmlEntCommands[] = {
130 AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_COMMENTS,
131 xmlEntSetStripComments, NULL, ACCESS_CONF, "XMLENT Strip Comments"),
132 AP_INIT_TAKE1( MODXMLENT_CONFIG_CONTENT_TYPE,
133 xmlEntSetContentType, NULL, ACCESS_CONF, "XMLENT Content Type"),
134 AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_PI,
135 xmlEntSetStripPI, NULL, ACCESS_CONF, "XMLENT Strip XML Processing Instructions"),
136 AP_INIT_TAKE1( MODXMLENT_CONFIG_DOCTYPE,
137 xmlEntSetDoctype, NULL, ACCESS_CONF, "XMLENT Doctype Declaration"),
138 AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_DOCTYPE,
139 xmlEntSetStripDoctype, NULL, ACCESS_CONF, "XMLENT Strip Doctype Declaration"),
140 AP_INIT_TAKE1( MODXMLENT_CONFIG_ESCAPE_SCRIPT,
141 xmlEntSetEscapeScript, NULL, ACCESS_CONF, "XMLENT Escape data in script tags"),
145 /* Creates a new config object */
146 static void* xmlEntCreateDirConfig( apr_pool_t* p, char* dir ) {
147 xmlEntConfig* config =
148 (xmlEntConfig*) apr_palloc( p, sizeof(xmlEntConfig) );
150 config->stripComments = 0;
152 config->stripDoctype = 0;
153 config->escapeScript = 1;
154 config->contentType = MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT;
155 config->doctype = NULL;
157 return (void*) config;
160 /* keep for a while in case we ever need it */
162 #define XMLENT_INHERIT(p, c, f) ((c->f) ? c->f : p->f);
163 static void* xmlEntMergeDirConfig(apr_pool_t *p, void *base, void *overrides) {
164 xmlEntConfig* parent = base;
165 xmlEntConfig* child = overrides;
166 xmlEntConfig* newConf = (xmlEntConfig*) apr_pcalloc(p, sizeof(xmlEntConfig));
167 newConf->contentType = XMLENT_INHERIT(parent, child, contentType);
168 newConf->stripComments = XMLENT_INHERIT(parent, child, stripComments);
174 /* We need a global parser object because sub-requests, with different
175 * filter contexts, are parsing part of the same document.
176 * This means that this filter will only work in forked (non-threaded) environments.
177 * XXX Figure out how to share pointers/data accross filters */
178 XML_Parser parser = NULL;
180 /* utility function which passes data to the next filter */
181 static void _fwrite( ap_filter_t* filter, char* data, ... ) {
182 if(!(filter && data)) return;
183 xmlEntContext* ctx = (xmlEntContext*) filter->ctx;
184 VA_LIST_TO_STRING(data);
185 ap_fwrite( filter->next, ctx->brigade, VA_BUF, strlen(VA_BUF));
189 /** XXX move me to opensrf/utils.h */
190 #define OSRF_UTILS_REPLACE_CHAR(str, o, n)\
193 while(str[i] != '\0') {\
200 /* cycles through the attributes attached to an element */
201 static void printAttr( ap_filter_t* filter, const char** atts ) {
204 for( i = 0; atts[i] && atts[i+1]; i++ ) {
205 const char* name = atts[i];
206 const char* value = atts[i+1];
207 char* escaped = ap_escape_html(filter->r->pool, value);
209 /* we make a big assumption here that if the string contains a ',
210 * then the original attribute was wrapped in "s - so recreate that */
211 if( strchr( escaped, '\'' ) ) {
212 OSRF_UTILS_REPLACE_CHAR(escaped,'"','\'');
213 _fwrite( filter, " %s=\"%s\"", name, escaped );
216 OSRF_UTILS_REPLACE_CHAR(escaped,'\'','"');
217 _fwrite( filter, " %s='%s'", name, escaped );
224 /* Starts an XML element */
225 static void XMLCALL startElement(void *userData, const char *name, const char **atts) {
226 ap_filter_t* filter = (ap_filter_t*) userData;
227 xmlEntConfig* config = ap_get_module_config(
228 filter->r->per_dir_config, &xmlent_module );
229 _fwrite(filter, "<%s", name );
230 printAttr( filter, atts );
231 if (!strncmp(config->contentType, MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT, 9)
232 && isEmptyElement(name)) {
233 _fwrite(filter, " />", name );
235 _fwrite(filter, ">", name );
237 if(!strcmp(name, "script"))
241 /* Handles the character data */
242 static void XMLCALL charHandler( void* userData, const XML_Char* s, int len ) {
243 ap_filter_t* filter = (ap_filter_t*) userData;
245 memset( data, '\0', sizeof(data) );
246 memcpy( data, s, len );
248 xmlEntConfig* config = ap_get_module_config(
249 filter->r->per_dir_config, &xmlent_module );
251 if( xmlEntInScript && ! config->escapeScript ) {
252 _fwrite( filter, "%s", data );
255 char* escaped = ap_escape_html(filter->r->pool, data);
256 _fwrite( filter, "%s", escaped );
260 static void XMLCALL handlePI( void* userData, const XML_Char* target, const XML_Char* data) {
261 ap_filter_t* filter = (ap_filter_t*) userData;
262 _fwrite(filter, "<?%s %s?>", target, data);
265 static void XMLCALL handleComment( void* userData, const XML_Char* comment ) {
266 ap_filter_t* filter = (ap_filter_t*) userData;
267 _fwrite(filter, "<!-- %s -->", comment);
270 /* Ends an XML element */
271 static void XMLCALL endElement(void *userData, const char *name) {
272 ap_filter_t* filter = (ap_filter_t*) userData;
273 xmlEntConfig* config = ap_get_module_config(
274 filter->r->per_dir_config, &xmlent_module );
275 if (!strncmp(config->contentType, MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT, 9)
276 && isEmptyElement(name)) {
279 _fwrite( filter, "</%s>", name );
280 if(!strcmp(name, "script"))
284 static void XMLCALL doctypeHandler( void* userData,
285 const char* name, const char* sysid, const char* pubid, int hasinternal ) {
287 ap_filter_t* filter = (ap_filter_t*) userData;
288 char* s = (sysid) ? (char*) sysid : "";
289 char* p = (pubid) ? (char*) pubid : "";
290 _fwrite( filter, "<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", name, p, s );
294 /* The handler. Create a new parser and/or filter context where appropriate
295 * and parse the chunks of data received from the brigade
297 static int xmlEntHandler( ap_filter_t *f, apr_bucket_brigade *brigade ) {
299 xmlEntContext* ctx = f->ctx;
300 apr_bucket* currentBucket = NULL;
301 apr_pool_t* pool = f->r->pool;
305 /* load the per-dir/location config */
306 xmlEntConfig* config = ap_get_module_config(
307 f->r->per_dir_config, &xmlent_module );
309 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
310 0, f->r, "XMLENT Config:\nContent Type = %s, "
311 "Strip PI = %s, Strip Comments = %s, Doctype = %s",
313 (config->stripPI) ? "yes" : "no",
314 (config->stripComments) ? "yes" : "no",
317 /* set the content type based on the config */
318 ap_set_content_type(f->r, config->contentType);
321 /* create the XML parser */
323 if( parser == NULL ) {
325 parser = XML_ParserCreate("UTF-8");
326 XML_SetUserData(parser, f);
327 XML_SetElementHandler(parser, startElement, endElement);
328 XML_SetCharacterDataHandler(parser, charHandler);
329 if(!config->stripDoctype)
330 XML_SetStartDoctypeDeclHandler( parser, doctypeHandler );
332 XML_SetProcessingInstructionHandler(parser, handlePI);
333 if(!config->stripComments)
334 XML_SetCommentHandler(parser, handleComment);
337 /* create the filter context */
339 f->ctx = ctx = apr_pcalloc( pool, sizeof(*ctx));
340 ctx->brigade = apr_brigade_create( pool, f->c->bucket_alloc );
341 ctx->parser = parser;
345 if(firstrun) { /* we haven't started writing the data to the stream yet */
347 /* go ahead and write the doctype out if we have one defined */
348 if(config->doctype) {
349 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
350 0, f->r, "XMLENT DOCTYPE => %s", config->doctype);
351 _fwrite(f, "%s\n", config->doctype);
356 /* cycle through the buckets in the brigade */
357 while (!APR_BRIGADE_EMPTY(brigade)) {
359 /* grab the next bucket */
360 currentBucket = APR_BRIGADE_FIRST(brigade);
362 /* clean up when we're done */
363 if (APR_BUCKET_IS_EOS(currentBucket) || APR_BUCKET_IS_FLUSH(currentBucket)) {
364 APR_BUCKET_REMOVE(currentBucket);
365 APR_BRIGADE_INSERT_TAIL(ctx->brigade, currentBucket);
366 ap_pass_brigade(f->next, ctx->brigade);
367 XML_ParserFree(parser);
372 /* read the incoming data */
373 int s = apr_bucket_read(currentBucket, &data, &len, APR_NONBLOCK_READ);
374 if( s != APR_SUCCESS ) {
375 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
376 "XMLENT error reading data from filter with status %d", s);
382 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
383 0, f->r, "XMLENT read %d bytes", (int)len);
385 /* push data into the XML push parser */
386 if ( XML_Parse(ctx->parser, data, len, 0) == XML_STATUS_ERROR ) {
389 memcpy(tmp, data, len);
392 /* log and die on XML errors */
393 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
394 "XMLENT XML Parse Error: %s at line %d: parsing %s: data %s",
395 XML_ErrorString(XML_GetErrorCode(ctx->parser)),
396 (int) XML_GetCurrentLineNumber(ctx->parser), f->r->filename, tmp);
398 XML_ParserFree(parser);
400 return HTTP_INTERNAL_SERVER_ERROR;
404 /* so a subrequest doesn't re-read this bucket */
405 apr_bucket_delete(currentBucket);
408 apr_brigade_destroy(brigade);
413 /* Register the filter function as a filter for modifying the HTTP body (content) */
414 static void xmlEntRegisterHook(apr_pool_t *pool) {
415 ap_register_output_filter("XMLENT", xmlEntHandler, NULL, AP_FTYPE_CONTENT_SET);
418 /* Define the module data */
419 module AP_MODULE_DECLARE_DATA xmlent_module = {
420 STANDARD20_MODULE_STUFF,
421 xmlEntCreateDirConfig, /* dir config creater */
422 NULL, /* dir merger --- default is to override */
423 NULL, /* server config */
424 NULL, /* merge server config */
425 xmlEntCommands, /* command apr_table_t */
426 xmlEntRegisterHook /* register hook */