XML的两种解析方式

JDK提供的XML解析方式分为两种:DOM方式和SAX方式
DOM:Document Object Model。需要读取整个XML文档,先需要在内存中构架代表整个DOM树的Document对象,可以进行随机访问. 需要考虑内存.适合增删改
SAX:Simple API for XML。采用事件驱动的方式解析XML文件,边读边对文档进行处理.适合读取

其他的xml解析包:Dom4J, PullParser(安卓)

Dom4J实现类似SAX方式, API类似DOM方式

DOM @JDK

public class DomTest {
    
    // 解析xml获取document对象
    private Document getDocument() throws ParserConfigurationException,
            SAXException, IOException {
        // 1. 获得工厂
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        // 2. 获得 builder对象
        DocumentBuilder builder = factory.newDocumentBuilder();
        // 3. 解析xml得带document对象 
        Document document = builder.parse("src/book.xml");
        return document;
    }
    
    // 将内存中的document对象写回xml文件中
    private void writeBack2Xml(Document document)
            throws TransformerFactoryConfigurationError,
            TransformerConfigurationException, TransformerException {
        
        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer transformer = factory.newTransformer();
        transformer.transform(new DOMSource(document), new StreamResult("src/book.xml"));
    }
    
    // 读取节点文本
    @Test
    public void testReadContent() throws Exception{
        Document document = getDocument();
        //根据标签名获得节点列表
        NodeList nodeList = document.getElementsByTagName("书");
        System.out.println("长度 : " + nodeList.getLength());
        // 返回第一个节点 
        Node firstNode = nodeList.item(0);
        // 返回文本内容
        String result  = firstNode.getTextContent();
        System.out.println(result);
    }

    // 读取节点的属性值 
    @Test
    public void testReadAttribute() throws Exception{
        Document document = getDocument();
        NodeList nodeList = document.getElementsByTagName("书");
        // 确认本Node为元素节点后加强转
        Node node = nodeList.item(0);
        if (node instanceof Element) {
            Element firstElement = (Element) node;
            String result = firstElement.getAttribute("出版社");
            System.out.println(result);
        }
    }
    
    // 添加节点 
    @Test
    public void testAddPrice() throws Exception, SAXException, IOException{
        Document document = getDocument();
        Node firstNode = document.getElementsByTagName("书").item(0);
        Element newElement = document.createElement("售价");
        newElement.setTextContent("79.00元");
        firstNode.appendChild(newElement);
        writeBack2Xml(document);  //写回
    }

    // 删除节点
    @Test
    public void testDelete() throws Exception, SAXException, IOException{
        Document document = getDocument();
        NodeList priceNodeList = document.getElementsByTagName("售价");
        for (int i = 0; i < priceNodeList.getLength(); i++) {
            Node node = priceNodeList.item(i);
            if("39.00元".equals(node.getTextContent())){
                // 从父节点删除子节点, 类似dom的api
                node.getParentNode().removeChild(node);
            }
        }
        writeBack2Xml(document); 
    }
    
    // 打印所有元素节点的名称
    @Test
    public void testPrintAllElementsName() throws Exception, SAXException, IOException{
        Document document = getDocument();
        // 递归打印
        printAllElementsName(document);
    }
    
    public void printAllElementsName(Node node){
        // 打印本节点
        if(Node.ELEMENT_NODE==node.getNodeType()){
            System.out.println(node.getNodeName());
        }
        // 处理子节点
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {  //递归出口: 循环完成
            Node item = childNodes.item(i);
            printAllElementsName(item);
        }
    }
}

SAX @JDK

public class SaxTest {

    public static void main(String[] args) throws Exception, SAXException {

        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser parser = factory.newSAXParser();
        XMLReader reader = parser.getXMLReader();
        reader.setContentHandler(new MyDefaultHandler());
        reader.parse("src/book.xml");
    }
}

class MyDefaultHandler extends DefaultHandler {

    // 作为被调用方, 流程不是这边控制, 所以定义状态位或计数器来标记流程进行的位置
    private boolean isPrice = false;
    private int count =1;
    
    @Override
    public void startElement(String uri, String localName, String qName,    // qName标签名
            Attributes attributes) throws SAXException {
        if ("售价".equals(qName)) {
            isPrice = true;
            count++;
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        if ("售价".equals(qName)) {
            isPrice = false;
        }
    }

    // 读到文本了
    @Override
    public void characters(char[] ch, int start, int length)
            throws SAXException {
        if (isPrice&&count==3) {
            System.out.println("文本: " + new String(ch, start, length));
        }
    }
}

Dom4J

public class Dom4JTest {
    
    private Document getDocument() throws DocumentException {
        // 获得 代表 xml 文件的 document 对象 
        SAXReader reader = new SAXReader();
        Document document = reader.read("src/book.xml");
        return document;
    }
    
    private void wirteBack2Xml(Document document) throws UnsupportedEncodingException, FileNotFoundException, IOException {
        OutputFormat format = OutputFormat.createPrettyPrint();
        format.setEncoding("UTF-8");
        XMLWriter writer = new XMLWriter(new FileOutputStream("src/book.xml"), format);
        writer.write(document);
        writer.close();
    }
    
    // 读取节点的文本内容
    // 由于 dom4j采用sax方式去解析的xml文档, 所以dom4j不能直接获得深层次的某个节点, 需要一级级获得
    @Test 
    public void testReadContent() throws Exception{
        
        Document document = getDocument();
        
        Element rootElement = document.getRootElement();
        Element firstLevelElement = rootElement.element("ele1");
        Element secondLevelElement = firstLevelElement.element("ele2");
        
        String value = secondLevelElement.getText();
        System.out.println(value);
    }

    // 读取属性值 
    @Test
    public void testReadAttribute() throws Exception{
        
        Document document = getDocument();
        
        Element rootElement = document.getRootElement();
        
        List<Element> list = rootElement.elements("书");
        Element secondElement = list.get(1);
        Attribute attribute = secondElement.attribute("出版社");
        
        String value = attribute.getValue();
        System.out.println(value);
    }
    
    // 添加节点 
    @Test
    public void testAddPrice() throws Exception{
        
        Document document = getDocument();
        
        Element rootElement = document.getRootElement();
        Element secondBookElement = (Element) rootElement.elements("书").get(1);
        // 创建新节点
        secondBookElement.addElement("newEle").setText("this is new Element");
        
        wirteBack2Xml(document);
    }

    
    // 删除节点
    @Test
    public void testDeletePrice() throws Exception{
        
        Document document = getDocument();
        Element rootElement = document.getRootElement();
        Element secondBookElement = (Element) rootElement.elements("书").get(1);
        Element targetBookPrice = (Element) secondBookElement.elements("售价").get(0);
        
        // 拿到父节点, 然后删除这个子节点 
        targetBookPrice.getParent().remove(targetBookPrice);
        wirteBack2Xml(document);
    }
}
原文地址:https://www.cnblogs.com/myJavaEE/p/6685361.html