xml--通过DOM解析XML

此文章通过3个例子表示DOM方式解析XML的用法。

通过DOM解析XML必须要写的3行代码.

step 1: 获得dom解析器工厂（工作的作用是用于创建具体的解析器）

step 2:获得具体的dom解析器

step 3: 解析一个xml文档，获得Document对象（根结点）

第一个例子通过简单的名称，取出对应元素的值。

例子1：

package com.text.tool;

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

public class DomTest1 {
	public static void main(String[] args) throws Exception {
		// step 1: 获得dom解析器工厂（工作的作用是用于创建具体的解析器）
		DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

		// System.out.println("class name: " + dbf.getClass().getName());

		// step 2:获得具体的dom解析器
		DocumentBuilder db = dbf.newDocumentBuilder();

		// System.out.println("class name: " + db.getClass().getName());

		// step3: 解析一个xml文档，获得Document对象（根结点）
		Document document = db.parse(new File("candidate.xml"));

		NodeList list = document.getElementsByTagName("PERSON");

		for (int i = 0; i < list.getLength(); i++) {
			Element element = (Element) list.item(i);

			String content = element.getElementsByTagName("NAME").item(0)
					.getFirstChild().getNodeValue();

			System.out.println("name:" + content);

			content = element.getElementsByTagName("ADDRESS").item(0)
					.getFirstChild().getNodeValue();

			System.out.println("address:" + content);

			content = element.getElementsByTagName("TEL").item(0)
					.getFirstChild().getNodeValue();

			System.out.println("tel:" + content);

			content = element.getElementsByTagName("FAX").item(0)
					.getFirstChild().getNodeValue();

			System.out.println("fax:" + content);

			content = element.getElementsByTagName("EMAIL").item(0)
					.getFirstChild().getNodeValue();

			System.out.println("email:" + content);

			System.out.println("--------------------------------------");
		}
	}
}

xml:

注意以下的xml内容要放到工程的根目录下，不要放在src目录下

<?xml version="1.0"?>
<PEOPLE>
	<PERSON PERSONID="E01">
		<NAME>Tony Blair</NAME>
		<ADDRESS>10 Downing Street, London, UK</ADDRESS>
		<TEL>(061) 98765</TEL>
		<FAX>(061) 98765</FAX>
		<EMAIL>blair@everywhere.com</EMAIL>
	</PERSON>
	<PERSON PERSONID="E02">
		<NAME>Bill Clinton</NAME>
		<ADDRESS>White House, USA</ADDRESS>
		<TEL>(001) 6400 98765</TEL>
		<FAX>(001) 6400 98765</FAX>
		<EMAIL>bill@everywhere.com</EMAIL>
	</PERSON>
	<PERSON PERSONID="E03">
		<NAME>Tom Cruise</NAME>
		<ADDRESS>57 Jumbo Street, New York, USA</ADDRESS>
		<TEL>(001) 4500 67859</TEL>
		<FAX>(001) 4500 67859</FAX>
		<EMAIL>cruise@everywhere.com</EMAIL>
	</PERSON>
	<PERSON PERSONID="E04">
		<NAME>Linda Goodman</NAME>
		<ADDRESS>78 Crax Lane, London, UK</ADDRESS>
		<TEL>(061) 54 56789</TEL>
		<FAX>(061) 54 56789</FAX>
		<EMAIL>linda@everywhere.com</EMAIL>
	</PERSON>
</PEOPLE>

运行结果：

name:Tony Blair
address:10 Downing Street, London, UK
tel:(061) 98765
fax:(061) 98765
email:blair@everywhere.com
--------------------------------------
name:Bill Clinton
address:White House, USA
tel:(001) 6400 98765
fax:(001) 6400 98765
email:bill@everywhere.com
--------------------------------------
name:Tom Cruise
address:57 Jumbo Street, New York, USA
tel:(001) 4500 67859
fax:(001) 4500 67859
email:cruise@everywhere.com
--------------------------------------
name:Linda Goodman
address:78 Crax Lane, London, UK
tel:(061) 54 56789
fax:(061) 54 56789
email:linda@everywhere.com
--------------------------------------

这个例子可以遍历得到xml中的节点名称和对应值（详见最后一个for循环）

另外要Node.getNodeType()为获取当前节点类型，此处在API中配置了不同的常量，代表不同的含义，详见API。
例子2：

package com.text.tool;

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class DomTest2 {
	public static void main(String[] args) throws Exception {
		DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

		DocumentBuilder db = dbf.newDocumentBuilder();

		Document doc = db.parse(new File("student.xml"));

		// System.out.println(doc.getXmlEncoding());
		// System.out.println(doc.getXmlVersion());
		// System.out.println(doc.getXmlStandalone());

		// 获得文档的根元素节点
		Element root = doc.getDocumentElement();

		System.out.println(root.getTagName());

		NodeList list = root.getChildNodes();

		System.out.println(list.getLength());

		for (int i = 0; i < list.getLength(); i++) {
			System.out.println(list.item(i).getNodeName());
		}

		System.out.println("----------------------------------");

		for (int i = 0; i < list.getLength(); i++) {
			Node n = list.item(i);

			System.out.println(n.getNodeType() + " : " + n.getNodeValue());
		}

		System.out.println("----------------------------------");

		for (int i = 0; i < list.getLength(); i++) {
			Node n = list.item(i);

			System.out.println(n.getTextContent());
		}

		System.out.println("----------------------------------");

		NodeList nodeList = doc.getElementsByTagName("学生");

		for (int i = 0; i < nodeList.getLength(); i++) {
			NamedNodeMap nnm = nodeList.item(i).getAttributes();

			String attrName = nnm.item(0).getNodeName();
			System.out.print(attrName);

			System.out.print("=");

			String attrValue = nnm.item(0).getNodeValue();

			System.out.println(attrValue);
		}

	}
}

XML:

<?xml version="1.0" encoding="utf-8"?>
<学生名册 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="student.xsd" >
	<学生 学号="1">
		<姓名>张三</姓名>
		<性别>男</性别>
		<年龄>20</年龄>
	</学生>
	<学生 学号="2">
		<姓名>李四</姓名>
		<性别>女</性别>
		<年龄>19</年龄>
	</学生>
	<学生 学号="3">
		<姓名>王五</姓名>
		<性别>男</性别>
		<年龄>21</年龄>
	</学生>
</学生名册>

输出结果：

学生名册
7
#text
学生
#text
学生
#text
学生
#text
----------------------------------
3 : 
	
1 : null
3 : 
	
1 : null
3 : 
	
1 : null
3 : 

----------------------------------

	

		张三
		男
		20
	

	

		李四
		女
		19
	

	

		王五
		男
		21
	


----------------------------------
学号=1
学号=2
学号=3

在例子2的基础上又做了修改

例子3

package com.text.tool;

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Attr;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * 使用递归解析给定的任意一个xml文档并且将其内容输出到命令行上
 * 
 * @author zhanglong
 * 
 */
public class DomTest3 {
	public static void main(String[] args) throws Exception {
		DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
		DocumentBuilder db = dbf.newDocumentBuilder();

		Document doc = db.parse(new File("student.xml"));
		// 获得根元素结点
		Element root = doc.getDocumentElement();

		parseElement(root);
	}

	private static void parseElement(Element element) {
		String tagName = element.getNodeName();

		NodeList children = element.getChildNodes();

		System.out.print("<" + tagName);

		// element元素的所有属性所构成的NamedNodeMap对象，需要对其进行判断
		NamedNodeMap map = element.getAttributes();

		// 如果该元素存在属性
		if (null != map) {
			for (int i = 0; i < map.getLength(); i++) {
				// 获得该元素的每一个属性
				Attr attr = (Attr) map.item(i);

				String attrName = attr.getName();
				String attrValue = attr.getValue();

				System.out.print(" " + attrName + "="" + attrValue + """);
			}
		}

		System.out.print(">");

		for (int i = 0; i < children.getLength(); i++) {
			Node node = children.item(i);
			// 获得结点的类型
			short nodeType = node.getNodeType();

			if (nodeType == Node.ELEMENT_NODE) {
				// 是元素，继续递归
				parseElement((Element) node);
			} else if (nodeType == Node.TEXT_NODE) {
				// 递归出口
				System.out.print(node.getNodeValue());
			} else if (nodeType == Node.COMMENT_NODE) {
				System.out.print("<!--");

				Comment comment = (Comment) node;

				// 注释内容
				String data = comment.getData();

				System.out.print(data);

				System.out.print("-->");
			}
		}

		System.out.print("</" + tagName + ">");
	}
}

XML同例子2一致。

输出结果：

<学生名册 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="student.xsd">
	<学生 学号="1">
		<姓名>张三</姓名>
		<性别>男</性别>
		<年龄>20</年龄>
	</学生>
	<学生 学号="2">
		<姓名>李四</姓名>
		<性别>女</性别>
		<年龄>19</年龄>
	</学生>
	<学生 学号="3">
		<姓名>王五</姓名>
		<性别>男</性别>
		<年龄>21</年龄>
	</学生>
</学生名册>

以上3个例子来源《北京圣思园XML培训视频》。