2 #include "http_config.h"
4 #include "http_protocol.h"
5 #include "http_request.h"
6 #include "apr_compat.h"
7 #include "apr_strings.h"
8 #include "apr_reslist.h"
10 #include "util_filter.h"
11 #include "opensrf/utils.h"
13 #include <sys/types.h>
17 #define MODULE_NAME "xmlent_module"
19 /* Define the config defaults here */
20 #define MODXMLENT_CONFIG_STRIP_COMMENTS "XMLEntStripComments"
21 #define MODXMLENT_CONFIG_STRIP_COMMENTS_DEFAULT "yes"
22 #define MODXMLENT_CONFIG_CONTENT_TYPE "XMLEntContentType"
23 #define MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT "text/html"
24 #define MODXMLENT_CONFIG_STRIP_PI "XMLEntStripPI"
25 #define MODXMLENT_CONFIG_STRIP_PI_DEFAULT "yes"
26 #define MODXMLENT_CONFIG_DOCTYPE "XMLEntDoctype"
28 #define MODXMLENT_CONFIG_STRIP_DOCTYPE \
29 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \
30 "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\""
33 module AP_MODULE_DECLARE_DATA xmlent_module;
37 apr_bucket_brigade* brigade; /* the bucket brigade we buffer our data into */
38 XML_Parser parser; /* our XML parser */
43 int stripComments; /* should we strip comments on the way out? */
44 int stripPI; /* should we strip processing instructions on the way out? */
45 char* contentType; /* the content type used to server pages */
46 char* doctype; /* the doctype header to send before any other data */
50 /* get the content type from the config */
51 static const char* xmlEntSetContentType(cmd_parms *params, void *cfg, const char *arg) {
52 xmlEntConfig* config = (xmlEntConfig*) cfg;
53 config->contentType = (char*) arg;
57 /* get the stip PI flag from the config */
58 static const char* xmlEntSetStripPI(cmd_parms *params, void *cfg, const char *arg) {
59 xmlEntConfig* config = (xmlEntConfig*) cfg;
60 char* a = (char*) arg;
61 config->stripPI = (a && !strcasecmp(a, "yes")) ? 1 : 0;
65 /* Get the strip comments flag from the config */
66 static const char* xmlEntSetStripComments(cmd_parms *params, void *cfg, const char *arg) {
67 xmlEntConfig* config = (xmlEntConfig*) cfg;
68 char* a = (char*) arg;
69 config->stripComments = (a && !strcasecmp(a, "yes")) ? 1 : 0;
73 /* Get the user defined doctype from the config */
74 static const char* xmlEntSetDoctype(cmd_parms *params, void *cfg, const char *arg) {
75 xmlEntConfig* config = (xmlEntConfig*) cfg;
76 config->doctype = (char*) arg;
80 /* Tell apache how to set our config variables */
81 static const command_rec xmlEntCommands[] = {
82 AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_COMMENTS,
83 xmlEntSetStripComments, NULL, ACCESS_CONF, "XMLENT Strip Comments"),
84 AP_INIT_TAKE1( MODXMLENT_CONFIG_CONTENT_TYPE,
85 xmlEntSetContentType, NULL, ACCESS_CONF, "XMLENT Content Type"),
86 AP_INIT_TAKE1( MODXMLENT_CONFIG_STRIP_PI,
87 xmlEntSetStripPI, NULL, ACCESS_CONF, "XMLENT Strip XML Processing Instructions"),
88 AP_INIT_TAKE1( MODXMLENT_CONFIG_DOCTYPE,
89 xmlEntSetDoctype, NULL, ACCESS_CONF, "XMLENT Doctype Declaration"),
93 /* Creates a new config object */
94 static void* xmlEntCreateDirConfig( apr_pool_t* p, char* dir ) {
95 xmlEntConfig* config =
96 (xmlEntConfig*) apr_palloc( p, sizeof(xmlEntConfig) );
97 config->stripComments =
98 (MODXMLENT_CONFIG_STRIP_COMMENTS_DEFAULT &&
99 !strcasecmp(MODXMLENT_CONFIG_STRIP_COMMENTS_DEFAULT, "yes")) ? 1 : 0;
101 (MODXMLENT_CONFIG_STRIP_PI_DEFAULT &&
102 !strcasecmp(MODXMLENT_CONFIG_STRIP_PI_DEFAULT, "yes")) ? 1 : 0;
103 config->contentType = MODXMLENT_CONFIG_CONTENT_TYPE_DEFAULT;
104 config->doctype = NULL;
105 return (void*) config;
108 /* keep for a while in case we ever need it */
110 #define XMLENT_INHERIT(p, c, f) ((c->f) ? c->f : p->f);
111 static void* xmlEntMergeDirConfig(apr_pool_t *p, void *base, void *overrides) {
112 xmlEntConfig* parent = base;
113 xmlEntConfig* child = overrides;
114 xmlEntConfig* newConf = (xmlEntConfig*) apr_pcalloc(p, sizeof(xmlEntConfig));
115 newConf->contentType = XMLENT_INHERIT(parent, child, contentType);
116 newConf->stripComments = XMLENT_INHERIT(parent, child, stripComments);
122 /* We need a global parser object because sub-requests, with different
123 * filter contexts, are parsing part of the same document.
124 * This means that this filter will only work in forked (non-threaded) environments.
125 * XXX Figure out how to share pointers/data accross filters */
126 XML_Parser parser = NULL;
128 /* utility function which passes data to the next filter */
129 static void _fwrite( ap_filter_t* filter, char* data, ... ) {
130 if(!(filter && data)) return;
131 xmlEntContext* ctx = (xmlEntContext*) filter->ctx;
132 VA_LIST_TO_STRING(data);
133 ap_fwrite( filter->next, ctx->brigade, VA_BUF, strlen(VA_BUF));
137 /* cycles through the attributes attached to an element */
138 static void printAttr( ap_filter_t* filter, const char** atts ) {
141 for( i = 0; atts[i] && atts[i+1]; i++ ) {
142 const char* name = atts[i];
143 const char* value = atts[i+1];
144 char* escaped = ap_escape_html(filter->r->pool, value);
145 _fwrite( filter, " %s='%s'", name, escaped );
150 /* Starts and XML element */
151 static void XMLCALL startElement(void *userData, const char *name, const char **atts) {
152 ap_filter_t* filter = (ap_filter_t*) userData;
153 _fwrite(filter, "<%s", name );
154 printAttr( filter, atts );
155 _fwrite(filter, ">\n", name );
158 /* Handles the character data */
159 static void XMLCALL charHandler( void* userData, const XML_Char* s, int len ) {
160 ap_filter_t* filter = (ap_filter_t*) userData;
163 memcpy( data, s, len );
164 char* escaped = ap_escape_html(filter->r->pool, data);
165 _fwrite( filter, escaped );
168 static void XMLCALL handlePI( void* userData, const XML_Char* target, const XML_Char* data) {
169 ap_filter_t* filter = (ap_filter_t*) userData;
170 _fwrite(filter, "<?%s %s?>", target, data);
173 /* Ends an XML element */
174 static void XMLCALL endElement(void *userData, const char *name) {
175 ap_filter_t* filter = (ap_filter_t*) userData;
176 _fwrite( filter, "</%s>\n", name );
180 /* The handler. Create a new parser and/or filter context where appropriate
181 * and parse the chunks of data received from the brigade
183 static int xmlEntHandler( ap_filter_t *f, apr_bucket_brigade *brigade ) {
185 xmlEntContext* ctx = f->ctx;
186 apr_bucket* currentBucket = NULL;
187 apr_pool_t* pool = f->r->pool;
191 /* load the per-dir/location config */
192 xmlEntConfig* config = ap_get_module_config(
193 f->r->per_dir_config, &xmlent_module );
195 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
196 0, f->r, "XMLENT Content Type = %s", config->contentType);
198 /* set the content type based on the config */
199 ap_set_content_type(f->r, config->contentType);
202 /* create the XML parser */
204 if( parser == NULL ) {
206 parser = XML_ParserCreate("UTF-8");
207 XML_SetUserData(parser, f);
208 XML_SetElementHandler(parser, startElement, endElement);
209 XML_SetCharacterDataHandler(parser, charHandler);
211 XML_SetProcessingInstructionHandler(parser, handlePI);
214 /* create the filter context */
216 f->ctx = ctx = apr_pcalloc( pool, sizeof(*ctx));
217 ctx->brigade = apr_brigade_create( pool, f->c->bucket_alloc );
218 ctx->parser = parser;
222 if(firstrun) { /* we haven't started writing the data to the stream yet */
224 /* go ahead and write the doctype out if we have one defined */
225 if(config->doctype) {
226 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
227 0, f->r, "XMLENT DOCTYPE => %s", config->doctype);
228 _fwrite(f, "%s\n\n", config->doctype);
233 /* cycle through the buckets in the brigade */
234 while (!APR_BRIGADE_EMPTY(brigade)) {
236 /* grab the next bucket */
237 currentBucket = APR_BRIGADE_FIRST(brigade);
239 /* clean up when we're done */
240 if (APR_BUCKET_IS_EOS(currentBucket) || APR_BUCKET_IS_FLUSH(currentBucket)) {
241 APR_BUCKET_REMOVE(currentBucket);
242 APR_BRIGADE_INSERT_TAIL(ctx->brigade, currentBucket);
243 ap_pass_brigade(f->next, ctx->brigade);
244 XML_ParserFree(parser);
249 /* read the incoming data */
250 int s = apr_bucket_read(currentBucket, &data, &len, APR_NONBLOCK_READ);
251 if( s != APR_SUCCESS ) {
252 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r,
253 "XMLENT error reading data from filter with status %d", s);
259 ap_log_rerror( APLOG_MARK, APLOG_DEBUG,
260 0, f->r, "XMLENT read %d bytes", (int)len);
262 /* push data into the XML push parser */
263 if ( XML_Parse(ctx->parser, data, len, 0) == XML_STATUS_ERROR ) {
265 /* log and die on XML errors */
266 ap_log_rerror( APLOG_MARK, APLOG_ERR, 0, f->r, "XMLENT XML Parse Error: %s at line %d\n",
267 XML_ErrorString(XML_GetErrorCode(ctx->parser)),
268 XML_GetCurrentLineNumber(ctx->parser));
270 XML_ParserFree(parser);
272 return HTTP_INTERNAL_SERVER_ERROR;
276 /* so a subrequest doesn't re-read this bucket */
277 apr_bucket_delete(currentBucket);
280 apr_brigade_destroy(brigade);
285 /* Register the filter function as a filter for modifying the HTTP body (content) */
286 static void xmlEntRegisterHook(apr_pool_t *pool) {
287 ap_register_output_filter("XMLENT", xmlEntHandler, NULL, AP_FTYPE_CONTENT_SET);
290 /* Define the module data */
291 module AP_MODULE_DECLARE_DATA xmlent_module = {
292 STANDARD20_MODULE_STUFF,
293 xmlEntCreateDirConfig, /* dir config creater */
294 NULL, /* dir merger --- default is to override */
295 NULL, /* server config */
296 NULL, /* merge server config */
297 xmlEntCommands, /* command apr_table_t */
298 xmlEntRegisterHook /* register hook */