通过w3c方式 读取xml内容

/**
     * 通过w3c方式 读取xml内容
     * @param lablenames    要读取的节点名称
     * @param file_path_name 文件绝对路径
     * @return
     */
    public static WebMagic readXML(List<String> lablenames, String file_path_name) {

        
        WebMagic webMagic = new WebMagic();

        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance();

            DocumentBuilder builder = factory.newDocumentBuilder();

            org.w3c.dom.Document document = builder.parse(new File(file_path_name));

            org.w3c.dom.Element rootElement = document.getDocumentElement();

            for (int i = 0; i < lablenames.size(); i++) {
                NodeList list = rootElement.getElementsByTagName(lablenames.get(i));
                
                org.w3c.dom.Element element = (org.w3c.dom.Element) list .item(0);

                System.out.println(element.getChildNodes().item(0).getNodeValue());

                if ("starturl".equals(element.getNodeName())) {
                    webMagic.setStarturl(element.getChildNodes().item(0)
                            .getNodeValue());
                }
                if ("regexstarturl".equals(element.getNodeName())) {
                    webMagic.setRegexstarturl(element.getChildNodes().item(0)
                            .getNodeValue());
                }
                if ("labelName".equals(element.getNodeName())) {
                    webMagic.setLabelName(element.getChildNodes().item(0)
                            .getNodeValue());
                }
                if ("labeltype".equals(element.getNodeName())) {
                    webMagic.setLabeltype(element.getChildNodes().item(0)
                            .getNodeValue());
                }
                if ("regexdescendants".equals(element.getNodeName())) {
                    webMagic.setRegexdescendants(element.getChildNodes()
                            .item(0).getNodeValue());
                }

            }

        } catch (Exception e) {

            System.out.println("exception:" + e.getMessage());

        }
        return webMagic;

    }
原文地址:https://www.cnblogs.com/baihaojie/p/5783219.html