python 解析xml

使用python解析xml

python自带了几种解析方法 xml.parsers.expat、xml.dom、xml.etree.ElementTree、xml.sax

这里介绍xml.domxml.etree.ElementTree

示例文件: 

<?xml version="1.0" encoding="utf-8"?>
<parent id="top">
<!--中文注释-->
<child1 name="paul">Text goes here</child1>
<child2 name="fred">More text</child2>
<child3 name="小明" age="12"> 中文说明 </child3>
</parent>


 ElementTree解析

#!/usr/bin/python
# -*- coding: utf-8 -*-
#python file
#Filename:xtree.py

# etree 解析,注释会丢失
import xml.etree.ElementTree as xparser
import os

def FindNodeAndSetAttr(xml, nodeName, attrMap):
    ele = xml.find(nodeName)
    atts = ele.attrib
    for k in attrMap:
        print(atts[k])
    for k, v in attrMap.items():
        print("%s--%s" % (k,v))
        ele.set(k, v)

print("current dir is %s" % (os.getcwd()))

params = {'name':'小明',
          'age':'12'}

xfile = "./example.xml"
xml1 = xparser.parse(xfile)

FindNodeAndSetAttr(xml1, 'child3', params)
xml1.write('aa.xml', 'UTF-8', True)


dom解析

#!/usr/bin/python
# -*- coding: utf-8 -*-
#python file
#Filename:xdom.py

# 使用dom可以保留注释
# 但是编码指令会丢失,需要手动写回去
import xml.dom.minidom as xparser
import os
import shutil

# 查找node并设置一些属性
def FindNodeAndSetAttr(xml, nodeName, attrMap):
    eles = xml.getElementsByTagName(nodeName)
    assert(eles[0].nodeName == nodeName)
    ele = eles[0]
    for k in attrMap:
        assert(ele.hasAttribute(k))
    for k, v in attrMap.items():
        print("%s--%s" % (k,v))
        ele.setAttribute(k, v)

# 把xml以utf8编码写文件
def WriteAsUTF8File(xmlparser, fileName):
    xmlstr = xmlparser.toxml()
    assert(xmlstr.startswith('<?xml version="1.0" ?>'))
    xmlstr = xmlstr.replace('<?xml version="1.0" ?>',
                            '<?xml version="1.0" encoding="UTF-8" ?>\n')
    fout = open(fileName, 'wb')
    utf8bytes = b'\xEF\xBB\xBF' + xmlstr.encode('utf-8')
    fout.write(utf8bytes)
    fout.close()

# 删除目录树,含异常处理    
def myrmtree(dirtree):
    try:
        shutil.rmtree(dirtree)
    except WindowsError as e:
        print(e)
    
print("current dir is %s" % (os.getcwd()))

params = {'name':'小明',
          'age':'12'}

cfg = "./example.xml"
cfg_bak = cfg + "_bak"
print("=====%s=======" % (cfg))
if os.path.exists(cfg_bak):
    print("%s has been processed" % (cfg))
    exit(-1)    
shutil.move(cfg, cfg_bak)
xml1 = xparser.parse(cfg_bak)
FindNodeAndSetAttr(xml1, 'child3', params)
WriteAsUTF8File(xml1, 'bb.xml')


输出

current dir is I:\新建文件夹 (2)
12
小明
age--12
name--小明
current dir is I:\新建文件夹 (2)
=====./example.xml=======
age--12
name--小明


 

原文地址:https://www.cnblogs.com/xkxjy/p/3672266.html