python于lxml应用

首先下载lxml， http://www.lfd.uci.edu/~gohlke/pythonlibs/ ，然后加入引用

from lxml import _elementpath as DONTUSE

from lxml import etree

详细演示样例：

1.加入命名空间

#set namespace

nsmap = {"xsi": "http://www.w3.org/2001/XMLSchema-instance" }

g_statisticsRoot = etree.Element("DcmStatistics", nsmap = nsmap)

2.加入xml schema引用

#add xsd reference

g_statisticsRoot.set("{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation", "DcmStatistics.xsd" )

3.加入凝视

#add comment。利用addprevious加入到根节点的前面

comment = etree.Comment("create by jiangong.li")

g_statisticsRoot.addprevious(comment)

4.尝试多种编码来解析xml

def decodingXml(xmlFile):

tree = None

encoding = "utf-8"

while(True):

try:

parser = etree.XMLParser(remove_blank_text= True, encoding=encoding, remove_comments = False)

tree = etree.parse(xmlFile, parser)

except Exception as e:

if (encoding != "gb18030"):

encoding = "gb18030"

continue

else:

print( " PAR XML ERROR, decoding error." )

break

return tree

5.遍历xml下的全部子节点，不止直属第一级子节点. iter()

for element in root.iter():

element.tail = None

6.遍历xml下的第一级子节点. iterchildren()

for e in srcParentNode.iterchildren():

if e is srcParentNode:

continue

name = ""

#statistics node

if e.tag == "element":

name = "Element"

elif e.tag == "sequence":

name = "Sequence"

elif e.tag == "item":

name = "Item"

else:

print( " Unsupported element type: %s " %(e.tag))

name = e.tag

# Only parse element/sequence/item

continue

7.加入子节点到尾部. append()

def getXmlElement(nodeName, parentNode):

if parentNode == None:

raise Exception( "parent node is None")

nodes = parentNode.xpath( './'+nodeName)

if len(nodes) == 0:

node = etree.Element(nodeName)

parentNode.append(node)

return node

else:

return nodes[0]

8.格式化成str输出

etree.tostring(g_statisticsRoot, encoding= "UTF-8", xml_declaration=True , pretty_print=True, with_comments=True )

9.保存成xml文件

statisticsResult = open(g_xmlName, "bw+")

statisticsResult.write(etree.tostring(g_statisticsRoot, encoding= "UTF-8", xml_declaration=True, pretty_print=True , with_comments=True))

statisticsResult.flush()

statisticsResult.close()