xml 模块

XML是实现不同语言或程序之间进行数据交换的协议

对于xml的操作有如下:

# /usr/bin/env python
# ! -*- encoding=utf-8 -*-

'''
xml的解析方法有两种;
一是通过XML() 转换
二是通过parse()转换
两者不同之处在于xml转换的是element对象,parse转换的是elementtree对象
'''

from xml.etree import ElementTree as ET

# xml
'''str_xml = open('first.xml','r',encoding='utf-8').read()
root = ET.XML(str_xml)
print(root,type(root))
#result = <Element 'DataSet' at 0x00000201C02B7598> <class 'xml.etree.ElementTree.Element'>

#parse

tree = ET.parse('first.xml')
print(tree,type(tree))
#echo result = <xml.etree.ElementTree.ElementTree object at 0x00000218DAC9C0B8> <class 'xml.etree.ElementTree.ElementTree'>

root = tree.getroot()
print(root,type(root))
#echo result = <Element 'DataSet' at 0x000001F1DACCC048> <class 'xml.etree.ElementTree.Element'>
'''

#------------------------------------------------------------------------
'''节点都拥有的共同属性
tag 节点的名字
attrib 节点的属性
text 节点的值

节点的方法:
find(self,path,namepsace=none) 查找指定的节点
makeelement(self,tag,attrib)创建一个新节点
copy(self 返回当前结点的copy
append(self,subelement) 为当前节点追加一个节点
extend(self,elements) 为当前节点扩展n个节点
insert(self,index,subelement) 在当前节点插入新节点
remove(self,subelement) 在当前节点中删除某个子节点
findtext(self,path,default=none,namespace=none) 查找第一个寻找到的子节点的值
findall(self,path,namespace=none) 获取所有子节点
iterfind(self,path,namespace=none) 获取所有指定的节点并创建一个迭代器
clear(self) 清空节点
get(self,key,default=none) 获取当前节点的属性值
set(self,key,value) 为当前节点设置属性值
keys(self) 获取当前节点的所有属性的key
items(self) 获取当前节点的所有属性值,每一个属性值都是一个键值对
iter(self,tag=none) 在当前节点的子节点中查找所有tag指定的节点,并返回一个迭代器
itertext(self,tag=none) 在当前节点的子节点中查找所有tag所指定的节点的值,并返回一个迭代器
'''

#遍历xml

'''tree = ET.parse('first.xml')
root = tree.getroot()

for i in root:
print(i.tag,i.attrib,i.text)
for j in i:
print(j.tag,j.attrib,j.text)'''

#遍历指定的节点
'''tree = ET.parse('first.xml')
root = tree.getroot()

for i in root.iter('test'):
print(i.text)'''

#修改/增加/删除/查询 节点中的属性/值,
tree = ET.parse('new_first.xml')
root = tree.getroot()

#增加 属性:值
#attrib for all
'''for i in root.iter('test'):
i.set('channel','m')
tree.write('new_first.xml',encoding='utf-8')'''

#for one
'''for i in root.iter('test'):
if i.text == 'test1':
i.text = 'success'
tree.write('new_first.xml',encoding='utf-8')'''

#删除属性:值
#for all
'''for i in root.iter('test'):
del i.attrib['channel']
tree.write('new_first.xml',encoding='utf-8')'''

#修改属性:值
#for all
'''for i in root.iter('test'):
i.set('channel','M')
tree.write('new_first.xml',encoding='utf-8')'''

#查询属性:值
#for all
'''for i in root.iter('test'):
print(i.attrib)'''

#节点的删除和增加
#for all
'''for i in root.findall('country'):
vle = i.find('test').text
if vle == 'success':
root.remove(i)
tree.write('new_first.xml',encoding='utf-8')'''

#xml 文档的创建,三种方式:如下
# One
''''#创建根节点
root = ET.Element('family')

#儿子节点
son1 = ET.Element('son1',{'name':'java'})
son2 = ET.Element('son2',{'name':'python'})

#孙子节点
grandson1 = ET.Element('grandson1',{'name':'c++'})
grandson2 = ET.Element('grandson2',{'name':'c'})

#把儿子添加到根节点下
root.append(son1)
root.append(son2)

#把孙子添加到儿子当中
son1.append(grandson1)
son2.append(grandson2)

#创建elementTree 对象
tree = ET.ElementTree(root)
#写入到文件中:
tree.write('test.xml',encoding='utf-8',xml_declaration=True,short_empty_elements=True)
'''

# two
'''root = ET.Element('family')

son1 = root.makeelement('son1',{'name':'java'})
son2 = root.makeelement('son2',{'name':'python'})

grandson1 = son1.makeelement('grandson1',{'name':'c++'})
grandson2 = son2.makeelement('grandson2',{'name':'c'})

root.append(son1)
root.append(son2)

son1.append(grandson1)
son2.append(grandson2)

tree = ET.ElementTree(root)
tree.write('test1.xml',encoding='utf-8')'''

# three
root = ET.Element('family')
son1 = ET.SubElement(root,'son1',{'name':'java'})
son2 = ET.SubElement(root,'son2',{'name':'python'})

grandson1 = ET.SubElement(son1,'grandson1',{'name':'c++'})
grandson2 = ET.SubElement(son2,'grandson2',{'name':'c'})

#tree = ET.ElementTree(root)
#tree.write('test2.xml',encoding='utf-8')

#如果需要进行缩进处理,需要引入 xml.dom 的minidom类

from xml.dom import minidom
def prettify(node):
#将节点转换为字符串并添加缩进
fix_str = ET.tostring(node,'utf-8')
reparsed = minidom.parseString(fix_str)
return reparsed.toprettyxml(indent=' ')

raw_str = prettify(root)

f = open('test3.xml','w',encoding='utf-8')
f.write(raw_str)
f.close()


##########################################################
关于命名空间

命名冲突

在 XML 中,元素名称是由开发者定义的,当两个不同的文档使用相同的元素名时,就会发生命名冲突。

from xml.etree import ElementTree as ET

ET.register_namespace('com',"http://www.company.com") #some name

# build a tree structure
root = ET.Element("{http://www.company.com}STUFF")
body = ET.SubElement(root, "{http://www.company.com}MORE_STUFF", attrib={"{http://www.company.com}hhh": "123"})
body.text = "STUFF EVERYWHERE!"

# wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)

tree.write("page.xml",
           xml_declaration=True,
           encoding='utf-8',
           method="xml")
原文地址:https://www.cnblogs.com/zxcv-/p/7732085.html