Friday, May 07, 2010

python minidom

import xml.dom.minidom

test = """
<bookstore>
<book category="COOKING">
  <title lang="en">Everyday Italian</title>
  <author>Giada De Laurentiis</author>
  <year>2005</year>
  <price>30.00</price>
</book>
<book category="CHILDREN">
  <title lang="en">Harry Potter</title>
  <author>J K. Rowling</author>
  <year>2005</year>
  <price>29.99</price>
</book>
<book category="WEB">
  <title lang="en">Learning XML</title>
  <author>Erik T. Ray</author>
  <year>2003</year>
  <price>39.95</price>
</book>
</bookstore>
"""

def getTagText(root, tag):
    node = root.getElementsByTagName(tag)[0]
    rc = ""
    for node in node.childNodes:
        if node.nodeType in ( node.TEXT_NODE, node.CDATA_SECTION_NODE):
            rc = rc + node.data
    return rc

def toStr(root, num):
    if num==0:
        print root.nodeName
    for node in root.childNodes:
        if node.nodeType == node.ELEMENT_NODE:
            value = getTagText(root,node.nodeName)
            if value != '':
                print "|    "*num  + "|-"+ node.nodeName.strip() + " ("+ getTagText(root,node.nodeName.strip()).strip() +")"
            else:
                print "|    "*num  + "|-"+ node.nodeName.strip()
            toStr(node,num+1)

dom = xml.dom.minidom.parseString(test.strip())
root = dom.documentElement

toStr(root,0) 

Add a reaction

About You

Comment