3 @brief Utility routines for XML documents.
6 #include <opensrf/xml_utils.h>
9 static void _xmlToJSON(xmlNodePtr node, jsonObject*);
12 @brief Write the contents of an xmlNode to standard output.
13 @param node Pointer to an xmlNode.
15 Write the text content of an xmlNode, and all its dependent nodes recursively, to
18 Warning: the output is pig-ugly, in part because whenever the child node is a tag
19 (rather than text), the content member is a NULL pointer.
21 Designed for debugging.
23 void recurse_doc( xmlNodePtr node ) {
24 if( node == NULL ) return;
25 printf("Recurse: %s => %s", node->name,
26 node->content ? (const char*) node->content : "(null)" );
27 xmlNodePtr t = node->children;
35 @brief Translate an XML document into a jsonObject.
36 @param doc An xmlDocPtr representing the XML document.
37 @return A pointer to the newly created jsonObject.
39 The translation pays attention only to tags and enclosed text. It ignores attributes,
40 comments, processing directives, and XML declarations.
42 The document as a whole is represented as a JSON_HASH with one member, whose key is the
45 Every tag is represented as the key of a member in a JSON_HASH, whose corresponding value
46 depends on what the element encloses:
48 - If the element is empty, its value is a JSON_NULL.
49 - If the element encloses only text, its value is a JSON_STRING containing the enclosed
50 text. Special characters and UTF-8 characters are escaped according to JSON rules;
51 otherwise, white space is preserved intact.
52 - If the element encloses one or more nested elements, its value is a JSON_HASH
53 whose members represent the enclosed elements, except that:
54 - If there are two or more elements with the same tag in the same enclosing element,
55 they collapse into a single entry whose value is a JSON_ARRAY of the corresponding values.
57 The calling code is responsible for freeing the jsonObject by calling jsonObjectFree().
59 jsonObject* xmlDocToJSON(xmlDocPtr doc) {
62 xmlNodePtr root = xmlDocGetRootElement( doc );
63 if( !root || xmlIsBlankNode( root ) )
66 jsonObject* obj = jsonNewObjectType( JSON_HASH );
67 _xmlToJSON( root, obj );
72 @brief Translate an xmlNodePtr into a jsonObject.
73 @param node Points to the XML node to be translated.
74 @param obj Pointer to an existing jsonObject into which the new jsonObject will be inserted.
76 See the description of xmlDocToJSON(), a thin wrapper for _xmlToJSON.
78 static void _xmlToJSON(xmlNodePtr node, jsonObject* obj) {
80 if( !node || !obj ) return;
81 if(xmlIsBlankNode(node)) return;
83 if(node->type == XML_TEXT_NODE) {
84 jsonObjectSetString(obj, (char*) node->content);
86 } else if(node->type == XML_ELEMENT_NODE || node->type == XML_ATTRIBUTE_NODE ) {
88 jsonObject* new_obj = jsonNewObject(NULL);
90 jsonObject* old = jsonObjectGetKey(obj, (char*) node->name);
92 // We have already encountered an element with the same tag
93 if( old->type == JSON_ARRAY ) {
94 // Add the new value to an existing JSON_ARRAY
95 jsonObjectPush(old, new_obj);
97 // Replace the earlier value with a JSON_ARRAY containing both values
98 jsonObject* arr = jsonNewObjectType( JSON_ARRAY );
99 jsonObjectPush( arr, jsonObjectClone(old) );
100 jsonObjectPush( arr, new_obj );
101 jsonObjectSetKey( obj, (char*) node->name, arr );
104 jsonObjectSetKey(obj, (char*) node->name, new_obj);
107 xmlNodePtr child = node->children;
108 if (child) { // at least one...
109 if (child != node->last) { // more than one -- ignore TEXT nodes
111 if( child->type != XML_TEXT_NODE )
112 _xmlToJSON( child, new_obj );
116 _xmlToJSON( child, new_obj );
123 @brief Translate an xmlDocPtr to a character string.
124 @param doc An xmlDocPtr referencing an XML document.
125 @param full Boolean; controls whether the output includes material outside the root.
126 @return Pointer to the generated string.
128 If @a full is true, the output includes any material outside of the root element, such
129 as processing directives, comments, and XML declarations. Otherwise it excludes them.
131 The calling code is responsible for freeing the string by calling free().
133 char* xmlDocToString(xmlDocPtr doc, int full) {
135 if(!doc) return NULL;
143 xmlDocDumpMemory(doc, &xmlbuf, &size);
144 xml = strdup((char*) (xmlbuf));
150 xmlBufferPtr xmlbuf = xmlBufferCreate();
151 xmlNodeDump( xmlbuf, doc, xmlDocGetRootElement(doc), 0, 0);
152 xml = strdup((char*) (xmlBufferContent(xmlbuf)));
153 xmlBufferFree(xmlbuf);
160 @brief Search for the value of a given attribute in an attribute array.
161 @param atts Pointer to the attribute array to be searched.
162 @param name Pointer to the attribute name to be sought.
163 @return A pointer to the attribute value if found, or NULL if not.
165 The @a atts parameter points to a ragged array of strings. The @a atts[0] pointer points
166 to an attribute name, and @a atts[1] points to the corresponding attribute value. The
167 remaining pointers likewise point alternately to names and values. The end of the
168 list is marked by a NULL.
170 In practice, the XML parser constructs the @a atts array and passes it to a callback
173 const char* xmlSaxAttr( const xmlChar** atts, const char* name ) {
176 for(i = 0; (atts[i] != NULL); i++) {
177 if(!strcmp((char*) atts[i], name)) {
179 return (const char*) atts[i];
187 @brief Add a series of attributes to an xmlNode.
188 @param node Pointer to the xmlNode to which the attributes will be added.
189 @param atts Pointer to the attributes to be added.
190 @return Zero in all cases.
192 The @a atts parameter points to a ragged array of strings. The @a atts[0] pointer points
193 to an attribute name, and @a atts[1] points to the corresponding attribute value. The
194 remaining pointers likewise point alternately to names and values. The end of the
195 list is marked by a NULL.
197 In practice, the XML parser constructs the @a atts array and passes it to a callback
200 int xmlAddAttrs( xmlNodePtr node, const xmlChar** atts ) {
203 for(i = 0; (atts[i] != NULL); i++) {
205 xmlSetProp(node, atts[i], atts[i+1]);