SAX解析xml (遍历DOM树各节点)

本文参考 http://yangjunfeng.iteye.com/blog/401377

1. books.xml

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <bk:books count="3" xmlns:bk="http://test.org/books">
 3     <!--books's comment -->
 4     <bk:book id="1">
 5         <bk:name>Thinking in JAVA</bk:name>
 6     </bk:book>
 7     <bk:book id="2">
 8         <bk:name>Core JAVA2</bk:name>
 9     </bk:book>
10     <bk:book id="3">
11         <bk:name>C++ primer</bk:name>
12         <addr:address xmlns:addr="http://test.org/address">
13             <addr:state>China</addr:state>
14             <addr:city>ShangHai</addr:city>
15         </addr:address>
16     </bk:book>
17 </bk:books> 

xml文件基本结构:

<books>Text文本节点(" "换行+制表符)

  <book>Text文本节点(" "换行+2个制表符)

    <name>Text文本节点("Thinking in JAVA")</name>Text文本节点(" "换行+制表符)

  </book>

.......

.......

</books>

2. 使用SAX api 解析xml

  1 package sax.parsing.example;
  2 
  3 import java.io.FileInputStream;
  4 import java.io.FileNotFoundException;
  5 import java.io.IOException;
  6 import java.io.Reader;
  7 
  8 import org.testng.annotations.Test;
  9 import org.xml.sax.Attributes;
 10 import org.xml.sax.ContentHandler;
 11 import org.xml.sax.InputSource;
 12 import org.xml.sax.Locator;
 13 import org.xml.sax.SAXException;
 14 import org.xml.sax.XMLReader;
 15 import org.xml.sax.helpers.XMLReaderFactory;
 16 
 17 import bsh.This;
 18 
 19 
 20 class MyContentHandler implements ContentHandler {
 21     
 22     private StringBuffer stringBuffer;
 23     int frontBlankCount = 0;
 24     
 25     
 26     public String toBlankString(int count) {
 27         StringBuffer buffer = new StringBuffer();
 28         for (int i=0; i<count; i++) 
 29             buffer.append("    ");
 30         return buffer.toString();
 31     }
 32 
 33     @Override
 34     public void setDocumentLocator(Locator locator) {
 35         System.out.println(this.toBlankString(this.frontBlankCount)
 36                 + ">>> set document_locator : (lineNumber = " + locator.getLineNumber()
 37                 + ", columnNumber = " + locator.getColumnNumber()
 38                 + ", systemId = " + locator.getSystemId()
 39                 + ". publicId = " + locator.getPublicId()
 40                 + ")"
 41                 );
 42     }
 43 
 44     @Override
 45     public void startDocument() throws SAXException {
 46         System.out.println(this.toBlankString(frontBlankCount) + ">>> start document");    
 47     }
 48 
 49     @Override
 50     public void endDocument() throws SAXException {
 51         System.out.print(this.toBlankString(frontBlankCount) + ">>> end document");    
 52     }
 53 
 54     @Override
 55     public void startPrefixMapping(String prefix, String uri) throws SAXException {
 56         System.out.println("
" + this.toBlankString(this.frontBlankCount) 
 57                 + ">>> start prefix_mapping : xmlns:" + prefix + "="" + uri + """);
 58     }
 59 
 60     @Override
 61     public void endPrefixMapping(String prefix) throws SAXException {
 62         System.out.print("
" + this.toBlankString(this.frontBlankCount) + ">>> end prefix_mapping : " + prefix);
 63     }
 64 
 65     @Override
 66     public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
 67         System.out.print(this.toBlankString(this.frontBlankCount++) + ">>> start element : " 
 68                     + qName + "(" + uri + ")"
 69                     );
 70     }
 71 
 72     @Override
 73     public void endElement(String uri, String localName, String qName) throws SAXException {
 74         System.out.print(this.toBlankString(--this.frontBlankCount) + ">>> end element : "
 75                 + qName + "(" + uri + ")"
 76                 );
 77     }
 78 
 79     /**
 80      *  Text文本节点 处理
 81      */
 82     @Override
 83     public void characters(char[] ch, int start, int length) throws SAXException {
 84         
 85         StringBuffer buffer = new StringBuffer();
 86         
 87         for (int i=start; i<start+length; i++) {
 88             switch (ch[i]) {
 89                 case '\': buffer.append("\\");break;
 90                 case '
': buffer.append("\r");break;
 91                 case '
': buffer.append("\n");break;
 92                 case '	': buffer.append("\t");break;
 93                 case '"': buffer.append("\"");break;
 94                 default: buffer.append(ch[i]);
 95             }
 96         }
 97         System.out.println("	>>> characters(" + length + "): " + buffer.toString());
 98     }
 99 
100     @Override
101     public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
102         
103         StringBuffer buffer = new StringBuffer();
104         
105         for (int i=start; i<start+length; i++) {
106             switch (ch[i]) {
107                 case '\': buffer.append("\\");break;
108                 case '
': buffer.append("\r");break;
109                 case '
': buffer.append("\n");break;
110                 case '	': buffer.append("\t");break;
111                 case '"': buffer.append("\"");break;
112                 default: buffer.append(ch[i]);
113             }
114         }
115         System.out.print(this.toBlankString(frontBlankCount) 
116                 + ">>> ignorable whitespace(" + length + "): " + buffer.toString());
117     }
118 
119     @Override
120     public void processingInstruction(String target, String data) throws SAXException {
121         System.out.print(this.toBlankString(this.frontBlankCount) 
122                 + ">>> process instruction : (target = "" + target + "", data = "" + data + ")");
123         
124     }
125 
126     @Override
127     public void skippedEntity(String name) throws SAXException {
128         System.out.print(this.toBlankString(this.frontBlankCount) + ">>> skipped_entity : " + name); 
129     }
130 }
131 
132 public class SaxTest {
133     
134     
135     @Test
136     public void test() throws SAXException, FileNotFoundException, IOException {
137         
138         
139         XMLReader reader = XMLReaderFactory.createXMLReader(); 
140         reader.setFeature("http://xml.org/sax/features/validation", true);
141         reader.setFeature("http://xml.org/sax/features/namespaces", true);
142         reader.setContentHandler(new MyContentHandler());
143         reader.parse(new InputSource(new FileInputStream("src/sax/parsing/example/books.xml")));
144         
145     }
146 }

输出结果:

>>> set document_locator : (lineNumber = 1, columnNumber = 1, systemId = null. publicId = null)
>>> start document
[Error] :5:10: Document is invalid: no grammar found.
[Error] :5:10: Document root element "bk:books", must match DOCTYPE root "null".

>>> start prefix_mapping : xmlns:bk="http://test.org/books"
>>> start element : bk:books(http://test.org/books) >>> characters(2):
  >>> start element : bk:book(http://test.org/books) >>> characters(3):
    >>> start element : bk:name(http://test.org/books) >>> characters(16): Thinking in JAVA
    >>> end element : bk:name(http://test.org/books) >>> characters(2):
  >>> end element : bk:book(http://test.org/books) >>> characters(2):
  >>> start element : bk:book(http://test.org/books) >>> characters(3):
    >>> start element : bk:name(http://test.org/books) >>> characters(10): Core JAVA2
    >>> end element : bk:name(http://test.org/books) >>> characters(2):
  >>> end element : bk:book(http://test.org/books) >>> characters(2):
  >>> start element : bk:book(http://test.org/books) >>> characters(3):
    >>> start element : bk:name(http://test.org/books) >>> characters(10): C++ primer
    >>> end element : bk:name(http://test.org/books) >>> characters(3):

    >>> start prefix_mapping : xmlns:addr="http://test.org/address"
    >>> start element : addr:address(http://test.org/address) >>> characters(4):
      >>> start element : addr:state(http://test.org/address) >>> characters(5): China
      >>> end element : addr:state(http://test.org/address) >>> characters(4):
      >>> start element : addr:city(http://test.org/address) >>> characters(8): ShangHai
      >>> end element : addr:city(http://test.org/address) >>> characters(3):
    >>> end element : addr:address(http://test.org/address)
    >>> end prefix_mapping : addr >>> characters(2):
  >>> end element : bk:book(http://test.org/books) >>> characters(1):
>>> end element : bk:books(http://test.org/books)
>>> end prefix_mapping : bk>>> end document

原文地址:https://www.cnblogs.com/asnjudy/p/4236403.html