案例

student.xml

<?xml version="1.0" encoding="UTF-8" ?>
 <students>

     <student number="heima_0001">
         <name id="1">tom</name>
         <age>18</age>
         <sex>male</sex>
     </student>

    <student number="heima_0002">
        <name>jack</name>
        <age>18</age>
        <sex>female</sex>
    </student>

 </students>

Jsoup案例

package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.net.URL;

/**
 * @author 旗木五五开
 * @create 2020-02-12 0:09
 * Jsoup对象功能
 */
public class JsoupDemo2 {
    public static void main(String[] args) throws IOException {
//
//        获取文件路径
        String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath();
//        1.parse​(File in, String charsetName):解析xml文档,加载文档进内存,获取dom树-->Document对象
        Document document1 = Jsoup.parse(new File(path), "utf-8");
        System.out.println(document1);
//        2.parse​(String html):解析HTML或XML字符串的
        String str ="<?xml version="1.0" encoding="UTF-8" ?>
" +
                " <students>
" +
                "
" +
                " 	<student number="heima_0001">
" +
                " 		<name>tom</name>
" +
                " 		<age>18</age>8
" +
                " 		<sex>male</sex>
" +
                " 	</student>
" +
                "
" +
                "	<student number="heima_0002">
" +
                "		<name>jack</name>
" +
                "		<age>18</age>
" +
                "		<sex>female</sex>
" +
                "	</student>
" +
                "
" +
                " </students>";
        Document document2 = Jsoup.parse(str);
        System.out.println(document2);
//        3.parse​(URL url, int timeoutMillis):通过网络路径获取指定的HTML或者XML的文档对象
        URL url=new URL("https://baike.baidu.com/item/jsoup/9012509");//代表网络中的一个资源路径
        Document document3 = Jsoup.parse(url, 10000);//10秒超时,不再访问
        System.out.println(document3);
    }
}

 Documet案例

package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;

/**
 * @author 旗木五五开
 * @create 2020-02-12 13:15
 * Document/Element对象功能
 */
public class JsoupDemo3 {
    public static void main(String[] args) throws IOException{
//        1.获取路径
        String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath();
//        2.获取Document对象
        Document document = Jsoup.parse(new File(path), "utf-8");
//        3.获取元素对象
//        3.1获取所有student对象
        Elements elements1 = document.getElementsByTag("student");
        System.out.println(elements1);
        System.out.println("----------");

//        3.2获取属性名为id的元素对象
        Elements elements2 = document.getElementsByAttribute("id");
        System.out.println(elements2);
        System.out.println("-----------");

//        3.3获取number属性值为heima_0001的元素
        Elements elements3 = document.getElementsByAttributeValue("number", "heima_0001");
        System.out.println(elements3);
        System.out.println("-------");

//        3.4获取id属性值的元素对象
        Element itcast = document.getElementById("1");
        System.out.println(itcast);
    }
}

Element案例

package cn.itcast.xml.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;

/**
 * @author 旗木五五开
 * @create 2020-02-13 12:02
 */
public class JsoupDemo4 {
    public static void main(String[] args) throws IOException {
//        1.获取路径
        String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath();
//        2.获取Document对象
        Document document = Jsoup.parse(new File(path), "utf-8");

//        通过Document对象获取那么标签,获取所有的name标签。
        Elements elements1 = document.getElementsByTag("name");
//        获取到2个,全部的
        System.out.println(elements1.size());
        System.out.println("--------------");

//        通过Element对象获取子标签对象
//        1.通过document对象获取student元素集合内的第一个element对象
        Element element_student = document.getElementsByTag("student").get(0);
//        2.通过第一个对象,获取子标签getElementsByTag()
        Elements ele_name = element_student.getElementsByTag("name");
//        输出为1,证明获取的是子标签
        System.out.println(ele_name.size());
        System.out.println("--------------");

//        获取student对象的属性值
        String number = element_student.attr("number");
        System.out.println(number);
        System.out.println("--------------");

//        获取文本内容
        String text = ele_name.text();
        String html = ele_name.html();
        System.out.println(text);
        System.out.println(html);
    }
}
原文地址:https://www.cnblogs.com/rijiyuelei/p/12405004.html