python处理xml文件

参考：https://docs.python.org/2/library/xml.etree.elementtree.html

例子：

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

1、解析xml文件

>>> os.getcwd()
'D:\workspace\testpython'
>>> import xml.etree.ElementTree as ET
>>> tree = ET.parse('test.xml')
>>> root = tree.getroot()
>>> print root
<Element 'data' at 0x1d2a8b0>
>>> print tree
<xml.etree.ElementTree.ElementTree object at 0x01D2A9D0>
>>> root.tag
'data'
>>> root.attrib
{}
>>> #遍历子节点
>>> for child in root:
    print child.tag,child.attrib

    
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}
>>> root[0].text
'
        '
>>> root[0][1].text
'2008'
>>> root[1][3].text
>>> root[1][2].text
'59900'

2、查找元素：root.iter()迭代，element.findall()，element.find()，element.get()，element.text

>>> #查询元素
>>> for neighbor in root.iter('neighbor'):
    print neighbor.attrib

    
{'direction': 'E', 'name': 'Austria'}
{'direction': 'W', 'name': 'Switzerland'}
{'direction': 'N', 'name': 'Malaysia'}
{'direction': 'W', 'name': 'Costa Rica'}
{'direction': 'E', 'name': 'Colombia'}
>>> root.iter('neighbor')
<generator object iter at 0x01D3CF30>
>>> root.findall('country')
[<Element 'country' at 0x1d2aa90>, <Element 'country' at 0x1d2ad30>, <Element 'country' at 0x1d2af10>]
>>> for country in root.findall('country'): #element.findall()查询当前元素的子元素
    rank=country.find('rank').text  #element.find()查询指定标签的第一个子元素，element.text获取元素的内容
    name=country.get('name')  #element.get()获取元素的属性值
    print name,rank

    
Liechtenstein 1
Singapore 4
Panama 68

3、修改xml文件：element.set()修改属性，element.append()增加子元素，element.remove()删除元素

修改元素属性：

>>> for rank in root.iter('rank'):
    new_rank = int(rank.text) + 1
    rank.text=str(new_rank)
    rank.set('updated','yes')

    
>>> tree.write('test.xml')

删除元素：

>>> for country in root.findall('country'):
    rank = int(country.find('rank').text)
    if rank>50:
        root.remove(country)

        
>>> tree.write('test.xml')

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
</data>

4、构建xml文件：SubElement()

>>> a = ET.Element('a')
>>> b = ET.SubElement(a, 'b')
>>> c = ET.SubElement(a, 'c')
>>> d = ET.SubElement(c, 'd')
>>> ET.dump(a)
<a><b /><c><d /></c></a>
>>>

5、用命名空间来解析xml文档

If the XML input has namespaces, tags and attributes with prefixes in the form prefix:sometag get expanded to {uri}sometag where the prefix is replaced by the full URI. Also, if there is a default namespace, that full URI gets prepended to all of the non-prefixed tags.

Here is an XML example that incorporates two namespaces, one with the prefix “fictional” and the other serving as the default namespace:

<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
        xmlns="http://people.example.com">
    <actor>
        <name>John Cleese</name>
        <fictional:character>Lancelot</fictional:character>
        <fictional:character>Archie Leach</fictional:character>
    </actor>
    <actor>
        <name>Eric Idle</name>
        <fictional:character>Sir Robin</fictional:character>
        <fictional:character>Gunther</fictional:character>
        <fictional:character>Commander Clement</fictional:character>
    </actor>
</actors>

>>> import xml.etree.ElementTree as ET
>>> tree=ET.parse('namespace.xml')
>>> root=tree.getroot()
>>> for actor in root.findall('real_person:actor', ns):
    name = actor.find('real_person:name', ns)
    print name.text
    for char in actor.findall('role:character', ns):
        print ' |-->', char.text

        
John Cleese
 |--> Lancelot
 |--> Archie Leach
Eric Idle
 |--> Sir Robin
 |--> Gunther
 |--> Commander Clement
>>>

定位、编辑、保存元素属性：

>>> import xml.etree.ElementTree as ET
>>> tree=ET.parse('spring-subtract.xml')
>>> root=tree.getroot()
>>> print root
<Element '{http://www.springframework.org/schema/beans}beans' at 0x1d423f0>
>>> print root.getchildren()
[<Element '{http://www.springframework.org/schema/beans}bean' at 0x1d422f0>, <Element '{http://www.springframework.org/schema/beans}bean' at 0x1d422d0>, <Element '{http://www.springframework.org/schema/beans}bean' at 0x1d425d0>]
>>> print root.getchildren()[2]
<Element '{http://www.springframework.org/schema/beans}bean' at 0x1d425d0>
>>> print root.getchildren()[2].getchildren()
[<Element '{http://www.springframework.org/schema/beans}property' at 0x1d42610>, <Element '{http://www.springframework.org/schema/beans}property' at 0x1d42670>]
>>> print root.getchildren()[2].getchildren()[1].attrib
{'name': 'cronExpression', 'value': '0000'}
>>> print root.getchildren()[2].getchildren()[1].attrib['value']
0000
>>> #编辑属性值
>>> root.getchildren()[2].getchildren()[1].set('value','222')
>>> tree.write('spring-subtract.xml')  #保存文件
>>> print root.getchildren()[2].getchildren()[1].attrib
{'name': 'cronExpression', 'value': '222'}
>>>