用java解析在OpenStreetMap上下载的地图数据(SAX版,适合比较大的xml文件)

java程序如下:

package gao.map.preprocess;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.helpers.DefaultHandler;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/**
 * 处理从OpenStreetMap下载的原始数据,将抽取的数据输出为text文件
 * @author Administrator
 *
 */
public class OpenStreetMap {
    static String s = null;
    //实现自己的解析方式
    static class MySAXHandler extends DefaultHandler {

        FileOutputStream fosPoint;
        FileOutputStream fosArc;
        OutputStreamWriter oswPoint;
        OutputStreamWriter oswArc;
        BufferedWriter bwPoint;
        BufferedWriter bwArc;
        //开始解析时调用
        public void startDocument() throws SAXException {
            // 点信息
            File pointFile = new File("D:\项目\mongo\MapPre\Point.txt");
            // 弧信息
            File arcFile = new File("D:\项目\mongo\MapPre\Arc.txt");
            try {
                fosPoint = new FileOutputStream(pointFile);
                fosArc = new FileOutputStream(arcFile);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            oswPoint = new OutputStreamWriter(fosPoint);
            oswArc = new OutputStreamWriter(fosArc);
            bwPoint = new BufferedWriter(oswPoint);
            bwArc = new BufferedWriter(oswArc);
            System.out.println("开始解析文档!");
        }

        //完成解析时调用
        public void endDocument() throws SAXException {
           try {
            bwPoint.close();
               oswPoint.close();
               fosPoint.close();
               bwArc.close();
               oswArc.close();
               fosArc.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
            System.out.println("文档解析完成!");
        }

        /**
         * 开始一个元素的解析
         * @param uri
         * @param localName
         * @param qName 标签名
         * @param attributes 属性
         * @throws SAXException
         */
        public void startElement(String uri, String localName, String qName,
                Attributes attributes) throws SAXException {
            if ((attributes != null)&&attributes.getLength() > 0) {
                if(qName.equals("node")){
                    StringBuilder sb = new StringBuilder();
                    sb.append(attributes.getValue("id")+"      ");
                    sb.append(attributes.getValue("lat")+"      ");
                    sb.append(attributes.getValue("lon"));
                    try {
                        bwPoint.write(sb.toString()+"
");
                        bwPoint.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    System.out.println(sb.toString());
                }else if(qName.equals("way")){
                    s = attributes.getValue("id")+"      "
                            +attributes.getValue("version")+"      ";
                }else if(qName.equals("nd")){
                    if(s==null)
                        return;
                    try {
                        bwArc.write(s+attributes.getValue("ref")+"      
");
                        bwArc.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    System.out.println(s+attributes.getValue("ref")+"      ");
                }else if(qName.equals("tag")){
                    if(s==null)
                        return;
                    try {
                        bwArc.write(s+"      "+attributes.getValue("k")+"
");
                        bwArc.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    System.out.println(s+"      "+attributes.getValue("k"));
                }else if(qName.equals("relation")){
                    if(s!=null)
                        s = null;
                }
            }
        }

        /**
         * 结束一个元素的解析,遇到结束标签时调用此方法 通常在此方法对标签取值并处理
         * @param uri
         * @param localName
         * @param qName
         * @throws SAXException
         */
        public void endElement(String uri, String localName, String qName)
                throws SAXException {
            
        }

        //该方法是获得元素间的text文本内容,可以通过new String(ch, start, length)来获得
        public void characters(char[] ch, int start, int length)
                throws SAXException {
//            System.out.print(new String(ch, start, length));
        }
    }
         
    public static void main(String[] args) {
        SAXParserFactory saxfac = SAXParserFactory.newInstance();
        try {
            SAXParser saxparser = saxfac.newSAXParser();
            InputStream is = new FileInputStream("D:\项目\map");
            MySAXHandler handler = new MySAXHandler();
            saxparser.parse(is, handler);;
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ParserConfigurationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SAXException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

   }
}

参考链接:

四种生成和解析XML文档的方法详解(介绍+优缺点比较+示例)

快速有效的解析大型XML文件

 

 详解Java解析XML的四种方法—DOM/SAX/jdom/dom4j

原文地址:https://www.cnblogs.com/gaopeng527/p/5129396.html