3 from xml.sax import handler, make_parser, saxutils
6 def xml_file_to_object(filename):
7 """Turns the contents of an XML file into a Python object"""
8 doc = xml.dom.minidom.parse(filename)
9 obj = xml_node_to_object(doc.documentElement)
13 def xml_string_to_object(string):
14 """Turns an XML string into a Python object"""
15 doc = xml.dom.minidom.parseString(string)
16 obj = xml_node_to_object(doc.documentElement)
20 def xml_node_to_object(xml_node):
21 """Turns an XML node into a Python object"""
24 if xml_node.nodeType != xml_node.ELEMENT_NODE:
28 node_name = xml_node.nodeName
30 for node_child in xml_node.childNodes:
31 if node_child.nodeType == xml_node.ELEMENT_NODE:
32 sub_obj = xml_node_to_object(node_child)
33 __append_child_node(obj, node_name, node_child.nodeName, sub_obj)
36 for attr in xml_node.attributes.values():
37 __append_child_node(obj, node_name, attr.name,
38 dict([(attr.name, attr.value)]))
41 if not done and len(xml_node.childNodes) > 0:
42 # If the node has no element children, clean up the text
43 # content and use that as the data
44 text_node = xml_node.childNodes[0] # extract the text node
45 data = unicode(text_node.nodeValue).replace('^\s*','')
46 data = data.replace('\s*$','')
49 # the current element contains attributes and text
50 obj[node_name]['#text'] = data
52 # the current element contains text only
58 def __append_child_node(obj, node_name, child_name, sub_obj):
59 """ If a node has element children, create a new sub-object
60 for this node, attach an array for each type of child
61 and recursively collect the children data into the array(s) """
63 if not obj.has_key(node_name):
66 if not obj[node_name].has_key(child_name):
67 # we've encountered 1 sub-node with node_child's name
68 if child_name in sub_obj:
69 obj[node_name][child_name] = sub_obj[child_name]
71 obj[node_name][child_name] = None
74 if isinstance(obj[node_name][child_name], list):
75 # we already have multiple sub-nodes with node_child's name
76 obj[node_name][child_name].append(sub_obj[child_name])
79 # we already have 1 sub-node with node_child's name, make
80 # it a list and append the current node
81 val = obj[node_name][child_name]
82 obj[node_name][child_name] = [ val, sub_obj[child_name] ]
86 class XMLFlattener(handler.ContentHandler):
87 ''' Turns an XML string into a flattened dictionary of properties.
89 Example <doc><a><b>text1</b></a><c>text2</c><c>text3</c></doc> becomes
92 'doc.c' : ['text2', 'text3']
96 reg = re.compile('^\s*$')
97 class Handler(handler.ContentHandler):
103 def startElement(self, name, attrs):
104 self.elements.append(name)
106 def characters(self, chars):
107 text = urllib.unquote_plus(chars)
108 if re.match(XMLFlattener.reg, text):
111 for elm in self.elements:
115 if key in self.result:
116 data = self._decode(self.result[key])
117 if isinstance(data, list):
121 self.result[key] = self._encode(data)
123 self.result[key] = self._encode(text)
126 def endElement(self, name):
129 def _decode(self, string):
131 return osrf.json.to_object(string)
134 def _encode(self, obj):
136 return osrf.json.to_json(obj)
141 def __init__(self, xml_str, encode_as_json=False):
142 self.xml_str = xml_str
143 self.use_json = encode_as_json
146 ''' Parses the XML string and returns the dict of keys/values '''
147 sax_handler = XMLFlattener.Handler()
148 sax_handler.use_json = self.use_json
149 parser = make_parser()
150 parser.setContentHandler(sax_handler)
153 parser.parse(StringIO.StringIO(self.xml_str))
155 osrf.log.log_error('Error parsing XML: %s' % unicode(e))
158 return sax_handler.result