From f198a0af938a37f451349c1093d762ae820aebcc Mon Sep 17 00:00:00 2001 From: erickson Date: Thu, 20 Dec 2007 14:13:29 +0000 Subject: [PATCH] added an xml flattener similar to the java xml flattener git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1180 9efc2488-bf62-4759-914b-345cdb29e865 --- src/python/osrf/xml_obj.py | 67 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/python/osrf/xml_obj.py b/src/python/osrf/xml_obj.py index a955a32..e1a7526 100644 --- a/src/python/osrf/xml_obj.py +++ b/src/python/osrf/xml_obj.py @@ -1,4 +1,7 @@ import xml.dom.minidom +import osrf.json +from xml.sax import handler, make_parser, saxutils +import urllib, re def xml_file_to_object(filename): """Turns the contents of an XML file into a Python object""" @@ -79,3 +82,67 @@ def __append_child_node(obj, node_name, child_name, sub_obj): obj[node_name][child_name] = [ val, sub_obj[child_name] ] + +class XMLFlattener(handler.ContentHandler): + ''' Turns an XML string into a flattened dictionary of properties. + + Example text1text2text3 becomes + { + 'doc.a.b' : 'text1', + 'doc.c' : ['text2', 'text3'] + } + ''' + + reg = re.compile('^\s*$') + class Handler(handler.ContentHandler): + def __init__(self): + self.result = {} + self.elements = [] + + def startElement(self, name, attrs): + self.elements.append(name) + + def characters(self, chars): + text = urllib.unquote_plus(chars) + if re.match(XMLFlattener.reg, text): + return + key = '' + for elm in self.elements: + key += elm + '.' + key = key[:-1] + + if key in self.result: + data = self.result[key] + if isinstance(data, list): + data.append(text) + else: + data = [data, text] + self.result[key] = data + else: + self.result[key] = text + + + def endElement(self, name): + self.elements.pop() + + + def __init__(self, xml_str): + self.xml_str = xml_str + + def parse(self): + ''' Parses the XML string and returns the dict of keys/values ''' + sax_handler = XMLFlattener.Handler() + parser = make_parser() + parser.setContentHandler(sax_handler) + try: + import StringIO + parser.parse(StringIO.StringIO(self.xml_str)) + except Exception, e: + osrf.log.log_error('Error parsing XML: %s' % unicode(e)) + raise e + + return sax_handler.result + + + + -- 2.43.2